1 files changed, 95 insertions, 40 deletions
diff --git a/vpx_dsp/psnrhvs.c b/vpx_dsp/psnrhvs.c
index 300170579..095ba5d13 100644
--- a/vpx_dsp/psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -10,6 +10,7 @@
  *  This code was originally written by: Gregory Maxwell, at the Daala
  *  project.
  */
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
@@ -18,6 +19,7 @@
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/ssim.h"
 #include "vpx_ports/system_state.h"
+#include "vpx_dsp/psnr.h"
 
 #if !defined(M_PI)
 # define M_PI (3.141592653589793238462643)
@@ -26,14 +28,29 @@
 
 static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
                            int xstride) {
+  int i, j;
   (void) xstride;
   vpx_fdct8x8(x, y, ystride);
+  for (i = 0; i < 8; i++)
+    for (j = 0; j< 8; j++)
+      *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;
 }
+#if CONFIG_VP9_HIGHBITDEPTH
+static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
+                           int xstride) {
+  int i, j;
+  (void) xstride;
+  vpx_highbd_fdct8x8(x, y, ystride);
+  for (i = 0; i < 8; i++)
+    for (j = 0; j< 8; j++)
+      *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;
+}
+#endif
 
 /* Normalized inverse quantization matrix for 8x8 DCT at the point of
  * transparency. This is not the JPEG based matrix from the paper,
  this one gives a slightly higher MOS agreement.*/
-static const float csf_y[8][8] = {
+static const double csf_y[8][8] = {
     {1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
      0.678296995242, 0.466224900598, 0.3265091542},
     {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
@@ -50,7 +67,7 @@ static const float csf_y[8][8] = {
      0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},
     {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
      0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};
-static const float csf_cb420[8][8] = {
+static const double csf_cb420[8][8] = {
     {1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
      0.898018824055, 0.74725392039, 0.615105596242},
     {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
@@ -67,7 +84,7 @@ static const float csf_cb420[8][8] = {
      0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},
     {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
      0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};
-static const float csf_cr420[8][8] = {
+static const double csf_cr420[8][8] = {
     {2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
      0.867069376285, 0.721500455585, 0.593906509971},
     {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
@@ -85,23 +102,38 @@ static const float csf_cr420[8][8] = {
     {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
      0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};
 
-static double convert_score_db(double _score, double _weight) {
-  return 10 * (log10(255 * 255) - log10(_weight * _score));
+static double convert_score_db(double _score, double _weight, int bit_depth) {
+  int16_t pix_max = 255;
+  assert(_score * _weight >= 0.0);
+  if (bit_depth == 10)
+    pix_max = 1023;
+  else if (bit_depth == 12)
+    pix_max = 4095;
+
+  if (_weight * _score < pix_max * pix_max * 1e-10)
+    return MAX_PSNR;
+  return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));
 }
 
-static double calc_psnrhvs(const unsigned char *_src, int _systride,
-                           const unsigned char *_dst, int _dystride,
-                           double _par, int _w, int _h, int _step,
-                           const float _csf[8][8]) {
-  float ret;
+static double calc_psnrhvs(const unsigned char *src, int _systride,
+                               const unsigned char *dst, int _dystride,
+                               double _par, int _w, int _h, int _step,
+                               const double _csf[8][8], uint32_t bit_depth,
+                               uint32_t _shift) {
+  double ret;
+  const uint8_t *_src8 = src;
+  const uint8_t *_dst8 = dst;
+  const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);
+  const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);
   int16_t dct_s[8 * 8], dct_d[8 * 8];
   tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];
-  float mask[8][8];
+  double mask[8][8];
   int pixels;
   int x;
   int y;
   (void) _par;
   ret = pixels = 0;
+
   /*In the PSNR-HVS-M paper[1] the authors describe the construction of
    their masking table as "we have used the quantization table for the
    color component Y of JPEG [6] that has been also obtained on the
@@ -126,23 +158,28 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
     for (x = 0; x < _w - 7; x += _step) {
       int i;
       int j;
-      float s_means[4];
-      float d_means[4];
-      float s_vars[4];
-      float d_vars[4];
-      float s_gmean = 0;
-      float d_gmean = 0;
-      float s_gvar = 0;
-      float d_gvar = 0;
-      float s_mask = 0;
-      float d_mask = 0;
+      double s_means[4];
+      double d_means[4];
+      double s_vars[4];
+      double d_vars[4];
+      double s_gmean = 0;
+      double d_gmean = 0;
+      double s_gvar = 0;
+      double d_gvar = 0;
+      double s_mask = 0;
+      double d_mask = 0;
       for (i = 0; i < 4; i++)
         s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;
       for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++) {
           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
-          dct_s[i * 8 + j] = _src[(y + i) * _systride + (j + x)];
-          dct_d[i * 8 + j] = _dst[(y + i) * _dystride + (j + x)];
+          if (bit_depth == 8 && _shift == 0) {
+            dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];
+            dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];
+          } else if (bit_depth == 10 || bit_depth == 12) {
+            dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift;
+            dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift;
+          }
           s_gmean += dct_s[i * 8 + j];
           d_gmean += dct_d[i * 8 + j];
           s_means[sub] += dct_s[i * 8 + j];
@@ -176,8 +213,16 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
         s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
       if (d_gvar > 0)
         d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
-      od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
-      od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (bit_depth == 10 || bit_depth == 12) {
+        hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+        hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+      }
+#endif
+      if (bit_depth == 8) {
+        od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+        od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
+      }
       for (i = 0; i < 8; i++)
         for (j = (i == 0); j < 8; j++)
           s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
@@ -190,8 +235,8 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
         s_mask = d_mask;
       for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++) {
-          float err;
-          err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
+          double err;
+          err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
           if (i != 0 || j != 0)
             err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
           ret += (err * _csf[i][j]) * (err * _csf[i][j]);
@@ -203,25 +248,35 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
   ret /= pixels;
   return ret;
 }
-double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,
+
+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *src,
                    const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,
-                   double *u_psnrhvs, double *v_psnrhvs) {
+                   double *u_psnrhvs, double *v_psnrhvs,
+                   uint32_t bd, uint32_t in_bd) {
   double psnrhvs;
   const double par = 1.0;
   const int step = 7;
+  uint32_t bd_shift = 0;
   vpx_clear_system_state();
-  *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,
-                            dest->y_stride, par, source->y_crop_width,
-                            source->y_crop_height, step, csf_y);
 
-  *u_psnrhvs = calc_psnrhvs(source->u_buffer, source->uv_stride, dest->u_buffer,
-                            dest->uv_stride, par, source->uv_crop_width,
-                            source->uv_crop_height, step, csf_cb420);
+  assert(bd == 8 || bd == 10 || bd == 12);
+  assert(bd >= in_bd);
 
-  *v_psnrhvs = calc_psnrhvs(source->v_buffer, source->uv_stride, dest->v_buffer,
-                            dest->uv_stride, par, source->uv_crop_width,
-                            source->uv_crop_height, step, csf_cr420);
-  psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
+  bd_shift = bd - in_bd;
 
-  return convert_score_db(psnrhvs, 1.0);
+  *y_psnrhvs = calc_psnrhvs(src->y_buffer, src->y_stride, dest->y_buffer,
+                            dest->y_stride, par, src->y_crop_width,
+                            src->y_crop_height, step, csf_y, bd,
+                            bd_shift);
+  *u_psnrhvs = calc_psnrhvs(src->u_buffer, src->uv_stride, dest->u_buffer,
+                            dest->uv_stride, par, src->uv_crop_width,
+                            src->uv_crop_height, step, csf_cb420, bd,
+                            bd_shift);
+  *v_psnrhvs = calc_psnrhvs(src->v_buffer, src->uv_stride, dest->v_buffer,
+                            dest->uv_stride, par, src->uv_crop_width,
+                            src->uv_crop_height, step, csf_cr420, bd,
+                            bd_shift);
+  psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
+  return convert_score_db(psnrhvs, 1.0, in_bd);
 }
+