12 files changed, 147 insertions, 152 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 82678d6b6..7b2be31c0 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -375,22 +375,22 @@ typedef struct macroblockd {
   int lossless;
 #endif
   /* Inverse transform function pointers. */
-  void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
-  void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
-  void (*idct_add)(int16_t *input, const int16_t *dq,
+  void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out);
+  void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out);
+  void (*itxm_add)(int16_t *input, const int16_t *dq,
     uint8_t *pred, uint8_t *output, int pitch, int stride);
-  void (*dc_idct_add)(int16_t *input, const int16_t *dq,
+  void (*dc_itxm_add)(int16_t *input, const int16_t *dq,
     uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);
-  void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr,
+  void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr,
     uint8_t *dst_ptr, int pitch, int stride);
-  void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,
     const int16_t *dc);
-  void (*idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);
-  void (*idct_add_uv_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
     uint16_t *eobs);
 
@@ -505,7 +505,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
     return tx_type;
 #if CONFIG_LOSSLESS
   if (xd->lossless)
-    return tx_type;
+    return DCT_DCT;
 #endif
   // TODO(rbultje, debargha): Explore ADST usage for superblocks
   if (xd->mode_info_context->mbmi.sb_type)
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index 548805726..2fec98e50 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -476,12 +476,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) {
   }
 }
 
-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
                                  uint8_t *dst_ptr,
                                  int pitch, int stride) {
   int r, c;
-  short tmp[16];
-  vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
+  int16_t dc = input_dc;
+  int16_t tmp[16];
+  vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);
 
   for (r = 0; r < 4; r++) {
     for (c = 0; c < 4; c++) {
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index 233ffd8a7..cb9a3db63 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -32,9 +32,9 @@ static void recon_dcblock_8x8(MACROBLOCKD *xd) {
 void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
   BLOCKD *b = &xd->block[block];
   if (b->eob <= 1)
-    xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
   else
-    xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
 }
 
 void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
@@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
 
   if (has_2nd_order) {
     /* do 2nd order transform on the dc block */
-    xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff);
+    xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff);
     recon_dcblock(xd);
   }
 
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index a29abe94e..bcdb26207 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -124,43 +124,30 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
+  xd->inv_txm4x4_1        = vp9_short_idct4x4llm_1;
+  xd->inv_txm4x4          = vp9_short_idct4x4llm;
+  xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1;
+  xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4;
+  xd->itxm_add            = vp9_dequant_idct_add;
+  xd->dc_only_itxm_add    = vp9_dc_only_idct_add_c;
+  xd->dc_itxm_add         = vp9_dequant_dc_idct_add;
+  xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block;
+  xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block;
+  xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #if CONFIG_LOSSLESS
-  pbi->mb.lossless = 0;
-  if (!QIndex) {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
-    pbi->mb.idct_add            = vp9_dequant_idct_add_lossless_c;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_inv_walsh_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
-    pbi->mb.lossless = 1;
-  } else {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-    pbi->mb.idct_add            = vp9_dequant_idct_add;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
+  if (xd->lossless) {
+    assert(QIndex == 0);
+    xd->inv_txm4x4_1        = vp9_short_inv_walsh4x4_1_x8;
+    xd->inv_txm4x4          = vp9_short_inv_walsh4x4_x8;
+    xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1_lossless;
+    xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4_lossless;
+    xd->itxm_add            = vp9_dequant_idct_add_lossless_c;
+    xd->dc_only_itxm_add    = vp9_dc_only_inv_walsh_add_c;
+    xd->dc_itxm_add         = vp9_dequant_dc_idct_add_lossless_c;
+    xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+    xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
+    xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
   }
-#else
-  pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-  pbi->mb.idct_add            = vp9_dequant_idct_add;
-  pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-  pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-  pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-  pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-  pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #endif
 
   for (i = 16; i < 24; i++) {
@@ -350,15 +337,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
       int i8x8mode = b->bmi.as_mode.first;
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
          xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
          xd->dst.uv_stride, xd->eobs + 16);
   } else {
@@ -405,17 +392,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                         *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (mode == B_PRED) {
@@ -439,7 +426,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                   *(b->base_dst) + b->dst, 16, b->dst_stride,
                                   b->eob);
       } else {
-        xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                       *(b->base_dst) + b->dst, 16, b->dst_stride);
       }
     }
@@ -449,7 +436,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
     xd->above_context->y2 = 0;
     xd->left_context->y2 = 0;
     vp9_build_intra_predictors_mbuv(xd);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -458,13 +445,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                            xd->eobs + 16);
   } else if (mode == SPLITMV) {
     assert(get_2nd_order_usage(xd) == 0);
-    xd->idct_add_y_block(xd->qcoeff,
+    xd->itxm_add_y_block(xd->qcoeff,
                           xd->block[0].dequant,
                           xd->predictor,
                           xd->dst.y_buffer,
                           xd->dst.y_stride,
                           xd->eobs);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -501,7 +488,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                         *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
@@ -510,7 +497,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
       assert(get_2nd_order_usage(xd) == 1);
       vp9_dequantize_b(b);
       if (xd->eobs[24] > 1) {
-        xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
         ((int *)b->qcoeff)[1] = 0;
         ((int *)b->qcoeff)[2] = 0;
@@ -520,11 +507,11 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
         ((int *)b->qcoeff)[6] = 0;
         ((int *)b->qcoeff)[7] = 0;
       } else {
-        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
       }
       vp9_dequantize_b(b);
-      xd->dc_idct_add_y_block(xd->qcoeff,
+      xd->dc_itxm_add_y_block(xd->qcoeff,
                                xd->block[0].dequant,
                                xd->predictor,
                                xd->dst.y_buffer,
@@ -532,7 +519,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                xd->eobs,
                                xd->block[24].diff);
     }
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -650,7 +637,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
             + x_idx * 16 + (i & 3) * 4,
             xd->dst.y_stride, xd->dst.y_stride, b->eob);
       } else {
-        xd->idct_add(
+        xd->itxm_add(
             b->qcoeff, b->dequant,
             xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
             + x_idx * 16 + (i & 3) * 4,
@@ -662,7 +649,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
   } else {
     vp9_dequantize_b(b);
     if (xd->eobs[24] > 1) {
-      xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
       ((int *)b->qcoeff)[0] = 0;
       ((int *)b->qcoeff)[1] = 0;
       ((int *)b->qcoeff)[2] = 0;
@@ -672,7 +659,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
       ((int *)b->qcoeff)[6] = 0;
       ((int *)b->qcoeff)[7] = 0;
     } else {
-      xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
       ((int *)b->qcoeff)[0] = 0;
     }
     vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
@@ -1534,17 +1521,24 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
 
   pc->sb64_coded = vp9_read_literal(&header_bc, 8);
   pc->sb32_coded = vp9_read_literal(&header_bc, 8);
-
-  /* Read the loop filter level and type */
-  pc->txfm_mode = vp9_read_literal(&header_bc, 2);
-  if (pc->txfm_mode == 3)
-    pc->txfm_mode += vp9_read_bit(&header_bc);
-  if (pc->txfm_mode == TX_MODE_SELECT) {
-    pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+#if CONFIG_LOSSLESS
+  xd->lossless = vp9_read_bit(&header_bc);
+  if (xd->lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
+  {
+    /* Read the loop filter level and type */
+    pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+    if (pc->txfm_mode == 3)
+      pc->txfm_mode += vp9_read_bit(&header_bc);
+    if (pc->txfm_mode == TX_MODE_SELECT) {
+      pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+    }
   }
-
   pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
   pc->filter_level = vp9_read_literal(&header_bc, 6);
   pc->sharpness_level = vp9_read_literal(&header_bc, 3);
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index ad93b49b1..c3f146480 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
   for (i = 0; i < 4; i++) {
     for (j = 0; j < 4; j++) {
       if (*eobs++ > 1)
-        xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]);
+        xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]);
       else
-        xd->dc_only_idct_add(dc[0], dst, dst, stride, stride);
+        xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride);
 
       q   += 16;
       dst += 4;
@@ -143,9 +143,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstu, dstu, stride, stride);
+        xd->itxm_add(q, dq, dstu, dstu, stride, stride);
       } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);
         ((int *)q)[0] = 0;
       }
 
@@ -159,9 +159,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstv, dstv, stride, stride);
+        xd->itxm_add(q, dq, dstv, dstv, stride, stride);
       } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);
         ((int *)q)[0] = 0;
       }
 
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index c5f2a70c6..02dc4edfc 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1668,7 +1668,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
   vp9_write_literal(&header_bc, pc->sb64_coded, 8);
   pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
   vp9_write_literal(&header_bc, pc->sb32_coded, 8);
-
+#if CONFIG_LOSSLESS
+  vp9_write_bit(&header_bc, cpi->oxcf.lossless);
+  if (cpi->oxcf.lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
   {
     if (pc->txfm_mode == TX_MODE_SELECT) {
       pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 91d4c4530..d5110c810 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -169,14 +169,14 @@ typedef struct macroblock {
   PICK_MODE_CONTEXT sb32_context[4];
   PICK_MODE_CONTEXT sb64_context;
 
-  void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
-  void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
-  void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
-  void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 1dd30130a..a52763080 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -62,7 +62,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
     vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
 #endif
   } else {
-    x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->fwd_txm4x4(be->src_diff, be->coeff, 32);
     x->quantize_b_4x4(be, b) ;
     vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
   }
@@ -165,7 +165,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
                    tx_type, 8, xd->block[idx].eob);
 #endif
     } else {
-      x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+      x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
       vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
     }
@@ -183,13 +183,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
         vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
 #endif
       } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-        x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm8x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
         i++;
       } else {
-        x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm4x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4(be, b);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
       }
@@ -222,7 +222,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
 
   vp9_subtract_b(be, b, 8);
 
-  x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
+  x->fwd_txm4x4(be->src_diff, be->coeff, 16);
   x->quantize_b_4x4(be, b);
   vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
 
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 52eabf129..fad55f776 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -188,11 +188,11 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
     } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
-      x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+      x->fwd_txm8x4(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
       i++;
     } else {
-      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
+      x->fwd_txm4x4(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
     }
   }
@@ -202,7 +202,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
     build_dcblock_4x4(x);
 
     // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm4x4(&x->block[24].src_diff[0],
                       &x->block[24].coeff[0], 8);
   } else {
     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -213,7 +213,7 @@ void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
   int i;
 
   for (i = 16; i < 24; i += 2) {
-    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+    x->fwd_txm8x4(&x->block[i].src_diff[0],
                          &x->block[i].coeff[0], 16);
   }
 }
@@ -253,7 +253,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
     } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
     }
   }
@@ -264,7 +264,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
     } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                            &x->block[i + 2].coeff[0], 32);
     }
   }
@@ -274,7 +274,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
     build_dcblock_8x8(x);
 
     // do 2nd order transform on the dc block
-    x->short_fhaar2x2(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm2x2(&x->block[24].src_diff[0],
                       &x->block[24].coeff[0], 8);
   } else {
     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -285,7 +285,7 @@ void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
   int i;
 
   for (i = 16; i < 24; i += 4) {
-    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+    x->fwd_txm8x8(&x->block[i].src_diff[0],
                          &x->block[i].coeff[0], 16);
   }
 }
@@ -303,7 +303,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
   if (tx_type != DCT_DCT) {
     vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
   } else {
-    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
+    x->fwd_txm16x16(&x->block[0].src_diff[0],
                            &x->block[0].coeff[0], 32);
   }
 }
@@ -321,9 +321,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) {
 void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
   SUPERBLOCK * const x_sb = &x->sb_coeff_data;
   vp9_clear_system_state();
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
+  x->fwd_txm16x16(x_sb->src_diff + 1024,
                          x_sb->coeff + 1024, 32);
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
+  x->fwd_txm16x16(x_sb->src_diff + 1280,
                          x_sb->coeff + 1280, 32);
 }
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 73b7b1f5f..b8993cf25 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -753,10 +753,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->quarter_pixel_search = 1;
   sf->half_pixel_search = 1;
   sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
-  sf->optimize_coefficients = 0;
-#else
   sf->optimize_coefficients = 1;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    sf->optimize_coefficients = 0;
 #endif
   sf->no_skip_block4x4_search = 1;
   sf->first_step = 0;
@@ -841,20 +841,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
     }
   }
 
-  cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
-  cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
-  cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
-  cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
-  cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-  cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+  cpi->mb.fwd_txm16x16  = vp9_short_fdct16x16;
+  cpi->mb.fwd_txm8x8    = vp9_short_fdct8x8;
+  cpi->mb.fwd_txm8x4    = vp9_short_fdct8x4;
+  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;
+  cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4;
+  cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2;
 
 #if CONFIG_LOSSLESS
   if (cpi->oxcf.lossless) {
-    cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
-    cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-    cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+    cpi->mb.fwd_txm8x4    = vp9_short_walsh8x4_x8;
+    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4_x8;
+    cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless;
   }
 #endif
 
@@ -1218,18 +1216,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
   cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
-  cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+  cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_idct4x4llm_1;
+  cpi->mb.e_mbd.inv_txm4x4      = vp9_short_idct4x4llm;
+  cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;
+  cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4;
 
 #if CONFIG_LOSSLESS
   cpi->oxcf.lossless = oxcf->lossless;
   if (cpi->oxcf.lossless) {
-    cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+    cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_inv_walsh4x4_1_x8;
+    cpi->mb.e_mbd.inv_txm4x4      = vp9_short_inv_walsh4x4_x8;
+    cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+    cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4_lossless;
   }
 #endif
 
@@ -2632,10 +2630,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // For 2 Pass Only used where GF/ARF prediction quality
   // is above a threshold
   cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
-  cpi->zbin_mode_boost_enabled = FALSE;
-#else
   cpi->zbin_mode_boost_enabled = TRUE;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    cpi->zbin_mode_boost_enabled = FALSE;
 #endif
   if (cpi->gfu_boost <= 400) {
     cpi->zbin_mode_boost_enabled = FALSE;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 066ca3a87..b2a83bc05 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -489,18 +489,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
   static const int zbin_boost[16] = { 0,  0,  0,  8,  8,  8, 10, 12,
                                      14, 16, 20, 24, 28, 32, 36, 40 };
 
-
-  int qrounding_factor = 48;
-
   for (Q = 0; Q < QINDEX_RANGE; Q++) {
     int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
 
+    int qrounding_factor = 48;
 #if CONFIG_LOSSLESS
-    if (cpi->oxcf.lossless) {
-      if (Q == 0) {
-        qzbin_factor = 64;
-        qrounding_factor = 64;
-      }
+    if (cpi->oxcf.lossless && Q == 0) {
+      qzbin_factor = 64;
+      qrounding_factor = 64;
     }
 #endif
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index a560e05ac..b7d566a67 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1137,7 +1137,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
       vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
       vp9_ht_quantize_b_4x4(be, b, tx_type);
     } else {
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
       x->quantize_b_4x4(be, b);
     }
 
@@ -1173,7 +1173,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
     vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
 #endif
   else
-    xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
 
   vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
@@ -1437,7 +1437,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
       if (tx_type != DCT_DCT)
         vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
       else
-        x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
 
       // compute quantization mse of 8x8 block
@@ -1471,11 +1471,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
           vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
           vp9_ht_quantize_b_4x4(be, b, tx_type);
         } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
           do_two = 1;
         } else {
-          x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm4x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4(be, b);
         }
         distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
@@ -2245,7 +2245,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
       if (xd->mode_info_context->mbmi.second_ref_frame > 0)
         vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);
       vp9_subtract_b(be, bd, 16);
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
       x->quantize_b_4x4(be, bd);
       thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
       *distortion += thisdistortion;
@@ -2297,7 +2297,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
 
       if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
         if (otherrd) {
-          x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+          x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
           x->quantize_b_8x8(be2, bd2);
           thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
           otherdist += thisdistortion;
@@ -2309,7 +2309,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
         for (j = 0; j < 4; j += 2) {
           bd = &xd->block[ib + iblock[j]];
           be = &x->block[ib + iblock[j]];
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
           thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
           *distortion += thisdistortion;
@@ -2327,7 +2327,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
           for (j = 0; j < 4; j += 2) {
             BLOCKD *bd = &xd->block[ib + iblock[j]];
             BLOCK *be = &x->block[ib + iblock[j]];
-            x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+            x->fwd_txm8x4(be->src_diff, be->coeff, 32);
             x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
             thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
             otherdist += thisdistortion;
@@ -2341,7 +2341,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
                            TX_4X4);
           }
         }
-        x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
         x->quantize_b_8x8(be2, bd2);
         thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
         *distortion += thisdistortion;