summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_skin_detection.c
blob: 0ca1665365d2416bdb601488a0b0ec513ee070b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <limits.h>
#include <math.h>

#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_skin_detection.h"

// Fixed-point skin color model parameters.
static const int skin_mean[2] = {7463, 9614};                 // q6
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157};  // q16
static const int skin_threshold = 1570636;                    // q18

// Thresholds on luminance.
static const int y_low = 20;
static const int y_high = 220;

// Evaluates the Mahalanobis distance measure for the input CbCr values.
static int evaluate_skin_color_difference(int cb, int cr) {
  const int cb_q6 = cb << 6;
  const int cr_q6 = cr << 6;
  const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
  const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
  const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
  const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
  const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
  const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
  const int skin_diff = skin_inv_cov[0] * cb_diff_q2 +
      skin_inv_cov[1] * cbcr_diff_q2 +
      skin_inv_cov[2] * cbcr_diff_q2 +
      skin_inv_cov[3] * cr_diff_q2;
  return skin_diff;
}

int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
  if (y < y_low || y > y_high)
    return 0;
  else
    return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
}

int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
                           int stride, int strideuv, int bsize) {
  // Take center pixel in block to determine is_skin.
  const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
  const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
  const int uv_width_shift = y_width_shift >> 1;
  const int uv_height_shift = y_height_shift >> 1;
  const uint8_t ysource = y[y_height_shift * stride + y_width_shift];
  const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift];
  const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift];
  return vp9_skin_pixel(ysource, usource, vsource);
}


#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
  int i, j, mi_row, mi_col, num_bl;
  VP9_COMMON *const cm = &cpi->common;
  uint8_t *y;
  const uint8_t *src_y = cpi->Source->y_buffer;
  const uint8_t *src_u = cpi->Source->u_buffer;
  const uint8_t *src_v = cpi->Source->v_buffer;
  const int src_ystride = cpi->Source->y_stride;
  const int src_uvstride = cpi->Source->uv_stride;
  int y_bsize = 16;  // Use 8x8 or 16x16.
  int uv_bsize = y_bsize >> 1;
  int ypos = y_bsize >> 1;
  int uvpos = uv_bsize >> 1;
  int shy = (y_bsize == 8) ? 3 : 4;
  int shuv = shy - 1;
  int fac = y_bsize / 8;
  // Use center pixel or average of center 2x2 pixels.
  int mode_filter = 1;
  YV12_BUFFER_CONFIG skinmap;
  memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG));
  if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height,
                               cm->subsampling_x, cm->subsampling_y,
                               VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment)) {
      vpx_free_frame_buffer(&skinmap);
      return;
  }
  memset(skinmap.buffer_alloc, 128, skinmap.frame_size);
  y = skinmap.y_buffer;
  // Loop through blocks and set skin map based on center pixel of block.
  // Set y to white for skin block, otherwise set to source with gray scale.
  // Ignore rightmost/bottom boundary blocks.
  for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) {
    num_bl = 0;
    for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) {
      // Select pixel for each block for skin detection.
      // Use center pixel, or 2x2 average at center.
      uint8_t ysource = src_y[ypos * src_ystride + ypos];
      uint8_t usource = src_u[uvpos * src_uvstride + uvpos];
      uint8_t vsource = src_v[uvpos * src_uvstride + uvpos];
      uint8_t ysource2 = src_y[(ypos + 1) * src_ystride + ypos];
      uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos];
      uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos];
      uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)];
      uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos  + 1)];
      uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos +  1)];
      uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
      uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos  + 1)];
      uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos +  1)];
      int is_skin = 0;
      if (mode_filter == 1) {
        ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2;
        usource = (usource + usource2 + usource3 + usource4) >> 2;
        vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2;
      }
      is_skin = vp9_skin_pixel(ysource, usource, vsource);
      for (i = 0; i < y_bsize; i++) {
        for (j = 0; j < y_bsize; j++) {
          if (is_skin)
            y[i * src_ystride + j] = 255;
          else
            y[i * src_ystride + j] = src_y[i * src_ystride + j];
        }
      }
      num_bl++;
      y += y_bsize;
      src_y += y_bsize;
      src_u += uv_bsize;
      src_v += uv_bsize;
    }
    y += (src_ystride << shy) - (num_bl << shy);
    src_y += (src_ystride << shy) - (num_bl << shy);
    src_u += (src_uvstride << shuv) - (num_bl << shuv);
    src_v += (src_uvstride << shuv) - (num_bl << shuv);
  }
  vp9_write_yuv_frame_420(&skinmap, yuv_skinmap_file);
  vpx_free_frame_buffer(&skinmap);
}
#endif