summaryrefslogtreecommitdiff
path: root/vp8/common/riscv/copymem_rvv.c
diff options
context:
space:
mode:
authorYuuta Liang <yuuta@yuuta.moe>2023-08-10 14:43:10 +0800
committerYuuta Liang <yuuta@yuuta.moe>2023-08-10 15:04:34 +0800
commitee24732cde941a9ff2f36bc19d13730e87430ed1 (patch)
treee31b3aed6f7d7b5e24bba8dcc811d6d954c27ccd /vp8/common/riscv/copymem_rvv.c
parentc2814bb8cc32362822b5a546c8de21b3a0e89032 (diff)
downloadlibvpx-riscv64_android_optimization.tar
libvpx-riscv64_android_optimization.tar.gz
libvpx-riscv64_android_optimization.tar.bz2
libvpx-riscv64_android_optimization.zip
RISC-V: optimize vp8_copy_mem with RVVriscv64_android_optimization
Test environment: 8c 1804Mhz i5-1140G7 RVV Impl: % CROSS=riscv64-unknown-linux-gnu- configure --target=riscv64-linux-gcc \ --enable-debug --enable-gprof && make -j % time qemu-riscv64 -cpu rv64,v=true,zba=true,vlen=128 -L /path/to/sysroot/ \ ./vpxenc --codec=vp8 -w 352 -h 288 -o akiyol.vpx ./akiyo_cif.yuv Pass 1/1 frame 300/300 314977B 8399b/f 251981b/s 92226 ms (3.25 fps) user 1m30.108s % gprof -abp ./vpxenc ./gmon.out | grep vp8_copy_mem 1.36 53.09 1.04 1025863 0.00 0.00 vp8_copy_mem16x16_rvv 0.72 59.01 0.55 1641368 0.00 0.00 vp8_copy_mem8x8_rvv 0.05 65.95 0.04 764377 0.00 0.00 vp8_copy_mem8x4_rvv C Impl: % CROSS=riscv64-unknown-linux-gnu- configure --target=generic-gnu --enable-debug \ --enable-gprof && make -j % time qemu-riscv64 -cpu rv64,v=true,zba=true,vlen=128 -L /path/to/sysroot/ \ ./vpxenc --codec=vp8 -w 352 -h 288 -o akiyol.vpx ./akiyo_cif.yuv Pass 1/1 frame 300/300 314977B 8399b/f 251981b/s 98417 ms (3.05 fps) user 1m36.146s % gprof -abp ./vpxenc ./gmon.out | grep vp8_copy_mem 0.38 63.96 0.31 vp8_copy_mem8x4_c 0.04 70.61 0.03 204336 0.00 0.00 vp8_copy_mem16x16_c Signed-off-by: Yuuta Liang <yuuta@yuuta.moe>
Diffstat (limited to 'vp8/common/riscv/copymem_rvv.c')
-rw-r--r--vp8/common/riscv/copymem_rvv.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/vp8/common/riscv/copymem_rvv.c b/vp8/common/riscv/copymem_rvv.c
new file mode 100644
index 000000000..1999a4fad
--- /dev/null
+++ b/vp8/common/riscv/copymem_rvv.c
@@ -0,0 +1,54 @@
+#include <riscv_vector.h>
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+
+void vp8_copy_mem16x16_rvv(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride) {
+ vuint64m2_t v;
+ size_t vl;
+ size_t n = 16;
+ while (n) {
+ /* Assume e64 is supported. */
+ vl = __riscv_vsetvl_e64m2(n);
+
+ v = __riscv_vlse64_v_u64m2((uint64_t *) src, src_stride, vl);
+ __riscv_vsse64_v_u64m2((uint64_t *) dst, dst_stride, v, vl);
+
+ v = __riscv_vlse64_v_u64m2((uint64_t *) (src + 8), src_stride, vl);
+ __riscv_vsse64_v_u64m2((uint64_t *) (dst + 8), dst_stride, v, vl);
+
+ n -= vl;
+ src += src_stride * vl;
+ dst += dst_stride * vl;
+ }
+}
+
+void vp8_copy_mem8x8_rvv(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride) {
+ size_t n = 8;
+ size_t vl;
+ vuint64m2_t v;
+
+ while (n) {
+ /* Assume e64 is supported. */
+ vl = __riscv_vsetvl_e64m2(n);
+
+ v = __riscv_vlse64_v_u64m2((uint64_t *) src, src_stride, vl);
+ __riscv_vsse64_v_u64m2((uint64_t *) dst, dst_stride, v, vl);
+
+ n -= vl;
+ src += src_stride * vl;
+ dst += dst_stride * vl;
+ }
+}
+
+void vp8_copy_mem8x4_rvv(unsigned char *src, int src_stride,
+ unsigned char *dst, int dst_stride) {
+ size_t vl;
+ vuint64m2_t v;
+
+ /* VL must be 4 because VLEN must be >= 128. Assume e64 is supported. */
+ vl = __riscv_vsetvl_e64m2(4);
+ v = __riscv_vlse64_v_u64m2((uint64_t *) src, src_stride, vl);
+ __riscv_vsse64_v_u64m2((uint64_t *) dst, dst_stride, v, vl);
+}