10 files changed, 547 insertions, 18 deletions
diff --git a/tools/3D-Reconstruction/MotionEST/Exhaust.py b/tools/3D-Reconstruction/MotionEST/Exhaust.py
new file mode 100644
index 000000000..3c0346814
--- /dev/null
+++ b/tools/3D-Reconstruction/MotionEST/Exhaust.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import numpy.linalg as LA
+from Util import MSE
+from MotionEST import MotionEST
+"""Exhaust Search:"""
+
+
+class Exhaust(MotionEST):
+  """
+    Constructor:
+        cur_f: current frame
+        ref_f: reference frame
+        blk_sz: block size
+        wnd_size: search window size
+        metric: metric to compare the blocks distrotion
+    """
+
+  def __init__(self, cur_f, ref_f, blk_size, wnd_size, metric=MSE):
+    self.name = 'exhaust'
+    self.wnd_sz = wnd_size
+    self.metric = metric
+    super(Exhaust, self).__init__(cur_f, ref_f, blk_size)
+
+  """
+    search method:
+        cur_r: start row
+        cur_c: start column
+    """
+
+  def search(self, cur_r, cur_c):
+    min_loss = self.dist(cur_r, cur_c, [0, 0], self.metric)
+    cur_x = cur_c * self.blk_sz
+    cur_y = cur_r * self.blk_sz
+    ref_x = cur_x
+    ref_y = cur_y
+    #search all validate positions and select the one with minimum distortion
+    for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz):
+      for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz):
+        if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz:
+          loss = self.dist(cur_r, cur_c, [y - cur_y, x - cur_x], self.metric)
+          if loss < min_loss:
+            min_loss = loss
+            ref_x = x
+            ref_y = y
+    return ref_x, ref_y
+
+  def est(self):
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        ref_x, ref_y = self.search(i, j)
+        self.mf[i, j] = np.array(
+            [ref_y - i * self.blk_sz, ref_x - j * self.blk_sz])
+
+
+"""Exhaust with Neighbor Constraint"""
+
+
+class ExhaustNeighbor(MotionEST):
+  """
+    Constructor:
+        cur_f: current frame
+        ref_f: reference frame
+        blk_sz: block size
+        wnd_size: search window size
+        beta: neigbor loss weight
+        metric: metric to compare the blocks distrotion
+    """
+
+  def __init__(self, cur_f, ref_f, blk_size, wnd_size, beta, metric=MSE):
+    self.name = 'exhaust + neighbor'
+    self.wnd_sz = wnd_size
+    self.beta = beta
+    self.metric = metric
+    super(ExhaustNeighbor, self).__init__(cur_f, ref_f, blk_size)
+    self.assign = np.zeros((self.num_row, self.num_col), dtype=np.bool)
+
+  """
+    estimate neighbor loss:
+        cur_r: current row
+        cur_c: current column
+        mv: current motion vector
+    """
+
+  def neighborLoss(self, cur_r, cur_c, mv):
+    loss = 0
+    #accumulate difference between current block's motion vector with neighbors'
+    for i, j in {(-1, 0), (1, 0), (0, 1), (0, -1)}:
+      nb_r = cur_r + i
+      nb_c = cur_c + j
+      if 0 <= nb_r < self.num_row and 0 <= nb_c < self.num_col and self.assign[
+          nb_r, nb_c]:
+        loss += LA.norm(mv - self.mf[nb_r, nb_c])
+    return loss
+
+  """
+    search method:
+        cur_r: start row
+        cur_c: start column
+    """
+
+  def search(self, cur_r, cur_c):
+    dist_loss = self.dist(cur_r, cur_c, [0, 0], self.metric)
+    nb_loss = self.neighborLoss(cur_r, cur_c, np.array([0, 0]))
+    min_loss = dist_loss + self.beta * nb_loss
+    cur_x = cur_c * self.blk_sz
+    cur_y = cur_r * self.blk_sz
+    ref_x = cur_x
+    ref_y = cur_y
+    #search all validate positions and select the one with minimum distortion
+    # as well as weighted neighbor loss
+    for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz):
+      for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz):
+        if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz:
+          dist_loss = self.dist(cur_r, cur_c, [y - cur_y, x - cur_x],
+                                self.metric)
+          nb_loss = self.neighborLoss(cur_r, cur_c, [y - cur_y, x - cur_x])
+          loss = dist_loss + self.beta * nb_loss
+          if loss < min_loss:
+            min_loss = loss
+            ref_x = x
+            ref_y = y
+    return ref_x, ref_y
+
+  def est(self):
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        ref_x, ref_y = self.search(i, j)
+        self.mf[i, j] = np.array(
+            [ref_y - i * self.blk_sz, ref_x - j * self.blk_sz])
+        self.assign[i, j] = True
diff --git a/tools/3D-Reconstruction/MotionEST/GroundTruth.py b/tools/3D-Reconstruction/MotionEST/GroundTruth.py
new file mode 100644
index 000000000..61b4bef42
--- /dev/null
+++ b/tools/3D-Reconstruction/MotionEST/GroundTruth.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import numpy.linalg as LA
+from MotionEST import MotionEST
+"""Ground Truth:
+
+    Load in ground truth motion field and mask
+"""
+
+
+class GroundTruth(MotionEST):
+  """
+    constructor:
+        cur_f: current frame
+        ref_f: reference frame
+        blk_sz: block size
+        gt_path: ground truth motion field file path
+    """
+
+  def __init__(self, cur_f, ref_f, blk_sz, gt_path):
+    self.name = 'ground truth'
+    super(GroundTruth, self).__init__(cur_f, ref_f, blk_sz)
+    self.mask = np.zeros((self.num_row, self.num_col), dtype=np.bool)
+    with open(gt_path) as gt_file:
+      lines = gt_file.readlines()
+      for i in xrange(len(lines)):
+        info = lines[i].split(';')
+        for j in xrange(len(info)):
+          x, y = info[j].split(',')
+          #-,- stands for nothing
+          if x == '-' or y == '-':
+            self.mask[i, -j - 1] = True
+            continue
+          #the order of original file is flipped on the x axis
+          self.mf[i, -j - 1] = np.array([float(y), -float(x)], dtype=np.int)
diff --git a/tools/3D-Reconstruction/MotionEST/HornSchunck.py b/tools/3D-Reconstruction/MotionEST/HornSchunck.py
new file mode 100644
index 000000000..0bf431cf6
--- /dev/null
+++ b/tools/3D-Reconstruction/MotionEST/HornSchunck.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import numpy.linalg as LA
+from scipy.ndimage.filters import gaussian_filter
+from scipy.sparse import csc_matrix
+from scipy.sparse.linalg import inv
+from MotionEST import MotionEST
+"""Horn & Schunck Model"""
+
+
+class HornSchunck(MotionEST):
+  """
+    constructor:
+        cur_f: current frame
+        ref_f: reference frame
+        blk_sz: block size
+        alpha: smooth constrain weight
+        sigma: gaussian blur parameter
+    """
+
+  def __init__(self, cur_f, ref_f, blk_sz, alpha, sigma, max_iter=100):
+    super(HornSchunck, self).__init__(cur_f, ref_f, blk_sz)
+    self.cur_I, self.ref_I = self.getIntensity()
+    #perform gaussian blur to smooth the intensity
+    self.cur_I = gaussian_filter(self.cur_I, sigma=sigma)
+    self.ref_I = gaussian_filter(self.ref_I, sigma=sigma)
+    self.alpha = alpha
+    self.max_iter = max_iter
+    self.Ix, self.Iy, self.It = self.intensityDiff()
+
+  """
+    Build Frame Intensity
+    """
+
+  def getIntensity(self):
+    cur_I = np.zeros((self.num_row, self.num_col))
+    ref_I = np.zeros((self.num_row, self.num_col))
+    #use average intensity as block's intensity
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        r = i * self.blk_sz
+        c = j * self.blk_sz
+        cur_I[i, j] = np.mean(self.cur_yuv[r:r + self.blk_sz, c:c + self.blk_sz,
+                                           0])
+        ref_I[i, j] = np.mean(self.ref_yuv[r:r + self.blk_sz, c:c + self.blk_sz,
+                                           0])
+    return cur_I, ref_I
+
+  """
+    Get First Order Derivative
+    """
+
+  def intensityDiff(self):
+    Ix = np.zeros((self.num_row, self.num_col))
+    Iy = np.zeros((self.num_row, self.num_col))
+    It = np.zeros((self.num_row, self.num_col))
+    sz = self.blk_sz
+    for i in xrange(self.num_row - 1):
+      for j in xrange(self.num_col - 1):
+        """
+                Ix:
+                (i  ,j) <--- (i  ,j+1)
+                (i+1,j) <--- (i+1,j+1)
+                """
+        count = 0
+        for r, c in {(i, j + 1), (i + 1, j + 1)}:
+          if 0 <= r < self.num_row and 0 < c < self.num_col:
+            Ix[i, j] += (
+                self.cur_I[r, c] - self.cur_I[r, c - 1] + self.ref_I[r, c] -
+                self.ref_I[r, c - 1])
+            count += 2
+        Ix[i, j] /= count
+        """
+                Iy:
+                (i  ,j)      (i  ,j+1)
+                   ^             ^
+                   |             |
+                (i+1,j)      (i+1,j+1)
+                """
+        count = 0
+        for r, c in {(i + 1, j), (i + 1, j + 1)}:
+          if 0 < r < self.num_row and 0 <= c < self.num_col:
+            Iy[i, j] += (
+                self.cur_I[r, c] - self.cur_I[r - 1, c] + self.ref_I[r, c] -
+                self.ref_I[r - 1, c])
+            count += 2
+        Iy[i, j] /= count
+        count = 0
+        #It:
+        for r in xrange(i, i + 2):
+          for c in xrange(j, j + 2):
+            if 0 <= r < self.num_row and 0 <= c < self.num_col:
+              It[i, j] += (self.ref_I[r, c] - self.cur_I[r, c])
+              count += 1
+        It[i, j] /= count
+    return Ix, Iy, It
+
+  """
+    Get weighted average of neighbor motion vectors
+    for evaluation of laplacian
+    """
+
+  def averageMV(self):
+    avg = np.zeros((self.num_row, self.num_col, 2))
+    """
+        1/12 ---  1/6 --- 1/12
+         |         |       |
+        1/6  --- -1/8 --- 1/6
+         |         |       |
+        1/12 ---  1/6 --- 1/12
+        """
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        for r, c in {(-1, 0), (1, 0), (0, -1), (0, 1)}:
+          if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col:
+            avg[i, j] += self.mf[i + r, j + c] / 6.0
+        for r, c in {(-1, -1), (-1, 1), (1, -1), (1, 1)}:
+          if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col:
+            avg[i, j] += self.mf[i + r, j + c] / 12.0
+    return avg
+
+  def est(self):
+    count = 0
+    """
+        u_{n+1} = ~u_n - Ix(Ix.~u_n+Iy.~v+It)/(IxIx+IyIy+alpha^2)
+        v_{n+1} = ~v_n - Iy(Ix.~u_n+Iy.~v+It)/(IxIx+IyIy+alpha^2)
+        """
+    denom = self.alpha**2 + np.power(self.Ix, 2) + np.power(self.Iy, 2)
+    while count < self.max_iter:
+      avg = self.averageMV()
+      self.mf[:, :, 1] = avg[:, :, 1] - self.Ix * (
+          self.Ix * avg[:, :, 1] + self.Iy * avg[:, :, 0] + self.It) / denom
+      self.mf[:, :, 0] = avg[:, :, 0] - self.Iy * (
+          self.Ix * avg[:, :, 1] + self.Iy * avg[:, :, 0] + self.It) / denom
+      count += 1
+    self.mf *= self.blk_sz
+
+  def est_mat(self):
+    row_idx = []
+    col_idx = []
+    data = []
+
+    N = 2 * self.num_row * self.num_col
+    b = np.zeros((N, 1))
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        """(IxIx+alpha^2)u+IxIy.v-alpha^2~u IxIy.u+(IyIy+alpha^2)v-alpha^2~v
+        """
+        u_idx = i * 2 * self.num_col + 2 * j
+        v_idx = u_idx + 1
+        b[u_idx, 0] = -self.Ix[i, j] * self.It[i, j]
+        b[v_idx, 0] = -self.Iy[i, j] * self.It[i, j]
+        #u: (IxIx+alpha^2)u
+        row_idx.append(u_idx)
+        col_idx.append(u_idx)
+        data.append(self.Ix[i, j] * self.Ix[i, j] + self.alpha**2)
+        #IxIy.v
+        row_idx.append(u_idx)
+        col_idx.append(v_idx)
+        data.append(self.Ix[i, j] * self.Iy[i, j])
+
+        #v: IxIy.u
+        row_idx.append(v_idx)
+        col_idx.append(u_idx)
+        data.append(self.Ix[i, j] * self.Iy[i, j])
+        #(IyIy+alpha^2)v
+        row_idx.append(v_idx)
+        col_idx.append(v_idx)
+        data.append(self.Iy[i, j] * self.Iy[i, j] + self.alpha**2)
+
+        #-alpha^2~u
+        #-alpha^2~v
+        for r, c in {(-1, 0), (1, 0), (0, -1), (0, 1)}:
+          if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col:
+            u_nb = (i + r) * 2 * self.num_col + 2 * (j + c)
+            v_nb = u_nb + 1
+
+            row_idx.append(u_idx)
+            col_idx.append(u_nb)
+            data.append(-1 * self.alpha**2 / 6.0)
+
+            row_idx.append(v_idx)
+            col_idx.append(v_nb)
+            data.append(-1 * self.alpha**2 / 6.0)
+        for r, c in {(-1, -1), (-1, 1), (1, -1), (1, 1)}:
+          if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col:
+            u_nb = (i + r) * 2 * self.num_col + 2 * (j + c)
+            v_nb = u_nb + 1
+
+            row_idx.append(u_idx)
+            col_idx.append(u_nb)
+            data.append(-1 * self.alpha**2 / 12.0)
+
+            row_idx.append(v_idx)
+            col_idx.append(v_nb)
+            data.append(-1 * self.alpha**2 / 12.0)
+    M = csc_matrix((data, (row_idx, col_idx)), shape=(N, N))
+    M_inv = inv(M)
+    uv = M_inv.dot(b)
+
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        self.mf[i, j, 0] = uv[i * 2 * self.num_col + 2 * j + 1, 0] * self.blk_sz
+        self.mf[i, j, 1] = uv[i * 2 * self.num_col + 2 * j, 0] * self.blk_sz
diff --git a/tools/3D-Reconstruction/MotionEST/MotionEST.py b/tools/3D-Reconstruction/MotionEST/MotionEST.py
new file mode 100644
index 000000000..68cf7e743
--- /dev/null
+++ b/tools/3D-Reconstruction/MotionEST/MotionEST.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import numpy.linalg as LA
+import matplotlib.pyplot as plt
+from Util import drawMF, MSE
+"""The Base Class of Estimators"""
+
+
+class MotionEST(object):
+  """
+    constructor:
+        cur_f: current frame
+        ref_f: reference frame
+        blk_sz: block size
+    """
+
+  def __init__(self, cur_f, ref_f, blk_sz):
+    self.cur_f = cur_f
+    self.ref_f = ref_f
+    self.blk_sz = blk_sz
+    #convert RGB to YUV
+    self.cur_yuv = np.array(self.cur_f.convert('YCbCr'))
+    self.ref_yuv = np.array(self.ref_f.convert('YCbCr'))
+    #frame size
+    self.width = self.cur_f.size[0]
+    self.height = self.cur_f.size[1]
+    #motion field size
+    self.num_row = self.height // self.blk_sz
+    self.num_col = self.width // self.blk_sz
+    #initialize motion field
+    self.mf = np.zeros((self.num_row, self.num_col, 2))
+
+  """
+    estimation function
+        Override by child classes
+    """
+
+  def motion_field_estimation(self):
+    pass
+
+  """
+    distortion of a block:
+        cur_r: current row
+        cur_c: current column
+        mv: motion vector
+        metric: distortion metric
+    """
+
+  def block_dist(self, cur_r, cur_c, mv, metric=MSE):
+    cur_x = cur_c * self.blk_sz
+    cur_y = cur_r * self.blk_sz
+    h = min(self.blk_sz, self.height - cur_y)
+    w = min(self.blk_sz, self.width - cur_x)
+    cur_blk = self.cur_yuv[cur_y:cur_y + h, cur_x:cur_x + w, :]
+    ref_x = int(cur_x + mv[1])
+    ref_y = int(cur_y + mv[0])
+    if 0 <= ref_x < self.width - w and 0 <= ref_y < self.height - h:
+      ref_blk = self.ref_yuv[ref_y:ref_y + h, ref_x:ref_x + w, :]
+    else:
+      ref_blk = np.zeros((h, w, 3))
+    return metric(cur_blk, ref_blk)
+
+  """
+    distortion of motion field
+    """
+
+  def distortion(self, mask=None, metric=MSE):
+    loss = 0
+    count = 0
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        if not mask is None and mask[i, j]:
+          continue
+        loss += self.dist(i, j, self.mf[i, j], metric)
+        count += 1
+    return loss / count
+
+  """
+    evaluation
+        compare the difference with ground truth
+    """
+
+  def motion_field_evaluation(self, ground_truth):
+    loss = 0
+    count = 0
+    gt = ground_truth.mf
+    mask = ground_truth.mask
+    for i in xrange(self.num_row):
+      for j in xrange(self.num_col):
+        if not mask is None and mask[i][j]:
+          continue
+        loss += LA.norm(gt[i, j] - self.mf[i, j])
+        count += 1
+    return loss / count
+
+  """
+    render the motion field
+    """
+
+  def show(self, ground_truth=None):
+    cur_mf = drawMF(self.cur_f, self.blk_sz, self.mf)
+    if ground_truth is None:
+      n_row = 1
+    else:
+      gt_mf = drawMF(self.cur_f, self.blk_sz, ground_truth)
+      n_row = 2
+    plt.figure(figsize=(n_row * 10, 10))
+    plt.subplot(1, n_row, 1)
+    plt.imshow(cur_mf)
+    plt.title('Estimated Motion Field')
+    if not ground_truth is None:
+      plt.subplot(1, n_row, 2)
+      plt.imshow(gt_mf)
+      plt.title('Ground Truth')
+    plt.tight_layout()
+    plt.show()
diff --git a/tools/3D-Reconstruction/MotionEST/Util.py b/tools/3D-Reconstruction/MotionEST/Util.py
new file mode 100644
index 000000000..f1a0cd42f
--- /dev/null
+++ b/tools/3D-Reconstruction/MotionEST/Util.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import numpy.linalg as LA
+import matplotlib.pyplot as plt
+from scipy.ndimage import filters
+from PIL import Image, ImageDraw
+
+
+def MSE(blk1, blk2):
+  return np.mean(
+      LA.norm(
+          np.array(blk1, dtype=np.int) - np.array(blk2, dtype=np.int), axis=2))
+
+
+def drawMF(img, blk_sz, mf):
+  img_rgba = img.convert('RGBA')
+  mf_layer = Image.new(mode='RGBA', size=img_rgba.size, color=(0, 0, 0, 0))
+  draw = ImageDraw.Draw(mf_layer)
+  width = img_rgba.size[0]
+  height = img_rgba.size[1]
+  num_row = height // blk_sz
+  num_col = width // blk_sz
+  for i in xrange(num_row):
+    left = (0, i * blk_sz)
+    right = (width, i * blk_sz)
+    draw.line([left, right], fill=(0, 0, 255, 255))
+  for j in xrange(num_col):
+    up = (j * blk_sz, 0)
+    down = (j * blk_sz, height)
+    draw.line([up, down], fill=(0, 0, 255, 255))
+  for i in xrange(num_row):
+    for j in xrange(num_col):
+      center = (j * blk_sz + 0.5 * blk_sz, i * blk_sz + 0.5 * blk_sz)
+      head = (center[0] + mf[i, j][1], center[1] + mf[i, j][0])
+      draw.line([center, head], fill=(255, 0, 0, 255))
+  return Image.alpha_composite(img_rgba, mf_layer)
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 662b8ef5e..94c9b3f26 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -244,14 +244,6 @@ typedef struct VP9Common {
   int byte_alignment;
   int skip_loop_filter;
 
-  // Private data associated with the frame buffer callbacks.
-  void *cb_priv;
-  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
-  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
-
-  // Handles memory for the codec.
-  InternalFrameBufferList int_frame_buffers;
-
   // External BufferPool passed from outside.
   BufferPool *buffer_pool;
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 513b9f678..cfcf4f7af 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -2554,6 +2554,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
       if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
         continue;
 
+      if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED &&
+          x->content_state_sb != kVeryHighSad)
+        continue;
+
       if ((cpi->sf.adaptive_rd_thresh_row_mt &&
            rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
                                       &rd_thresh_freq_fact[mode_index])) ||
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 529dca040..cccafd85c 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -456,6 +456,7 @@ static void set_rt_speed_feature_framesize_independent(
   sf->variance_part_thresh_mult = 1;
   sf->cb_pred_filter_search = 0;
   sf->force_smooth_interpol = 0;
+  sf->rt_intra_dc_only_low_content = 0;
 
   if (speed >= 1) {
     sf->allow_txfm_domain_distortion = 1;
@@ -740,12 +741,7 @@ static void set_rt_speed_feature_framesize_independent(
       sf->nonrd_use_ml_partition = 0;
 #endif
     if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL;
-    // Only keep INTRA_DC mode for speed 8.
-    if (!is_keyframe) {
-      int i = 0;
-      for (i = 0; i < BLOCK_SIZES; ++i)
-        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
-    }
+    sf->rt_intra_dc_only_low_content = 1;
     if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
         content != VP9E_CONTENT_SCREEN) {
       // More aggressive short circuit for speed 8.
@@ -771,6 +767,12 @@ static void set_rt_speed_feature_framesize_independent(
   }
 
   if (speed >= 9) {
+    // Only keep INTRA_DC mode for speed 9.
+    if (!is_keyframe) {
+      int i = 0;
+      for (i = 0; i < BLOCK_SIZES; ++i)
+        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+    }
     sf->cb_pred_filter_search = 1;
     sf->mv.enable_adaptive_subpel_force_stop = 1;
     sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index eb0628199..ca284ded8 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -608,6 +608,10 @@ typedef struct SPEED_FEATURES {
 
   // Force subpel motion filter to always use SMOOTH_FILTER.
   int force_smooth_interpol;
+
+  // For real-time mode: force DC only under intra search when content
+  // does not have high souce SAD.
+  int rt_intra_dc_only_low_content;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/vpx_dsp/arm/transpose_neon.h b/vpx_dsp/arm/transpose_neon.h
index 43340e48d..752308160 100644
--- a/vpx_dsp/arm/transpose_neon.h
+++ b/vpx_dsp/arm/transpose_neon.h
@@ -138,8 +138,8 @@ static INLINE void transpose_s16_4x4q(int16x8_t *a0, int16x8_t *a1) {
       vtrnq_s32(vreinterpretq_s32_s16(*a0), vreinterpretq_s32_s16(*a1));
 
   // Swap 64 bit elements resulting in:
-  // c0.val[0]: 00 01 20 21  02 03 22 23
-  // c0.val[1]: 10 11 30 31  12 13 32 33
+  // c0: 00 01 20 21  02 03 22 23
+  // c1: 10 11 30 31  12 13 32 33
 
   const int32x4_t c0 =
       vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b0.val[1]));
@@ -169,8 +169,8 @@ static INLINE void transpose_u16_4x4q(uint16x8_t *a0, uint16x8_t *a1) {
       vtrnq_u32(vreinterpretq_u32_u16(*a0), vreinterpretq_u32_u16(*a1));
 
   // Swap 64 bit elements resulting in:
-  // c0.val[0]: 00 01 20 21  02 03 22 23
-  // c0.val[1]: 10 11 30 31  12 13 32 33
+  // c0: 00 01 20 21  02 03 22 23
+  // c1: 10 11 30 31  12 13 32 33
 
   const uint32x4_t c0 =
       vcombine_u32(vget_low_u32(b0.val[0]), vget_low_u32(b0.val[1]));