diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2023-11-21 14:39:39 +0000 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2023-11-22 09:10:43 +0000 |
commit | bd70d3bacfcd53b04b5b5dd7a5d10062ac6fa50a (patch) | |
tree | 99e0409a5de9b3e0562be3e609a857035df9ee0e /sysdeps/aarch64/fpu | |
parent | 5d7f1bce7d8eea31f4baeb68bcc3124b35acc751 (diff) | |
download | glibc-bd70d3bacfcd53b04b5b5dd7a5d10062ac6fa50a.tar glibc-bd70d3bacfcd53b04b5b5dd7a5d10062ac6fa50a.tar.gz glibc-bd70d3bacfcd53b04b5b5dd7a5d10062ac6fa50a.tar.bz2 glibc-bd70d3bacfcd53b04b5b5dd7a5d10062ac6fa50a.zip |
aarch64: Fix libmvec benchmarks
These were broken by the new atan2 functions, as they were only
set up for univariate functions. Arity is now detected from the
input file - this revealed a mistake that the double-precision
inputs were being used for both single- and double-precision
routines, which is now remedied.
Diffstat (limited to 'sysdeps/aarch64/fpu')
-rw-r--r-- | sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py | 66 | ||||
-rwxr-xr-x | sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py | 64 |
2 files changed, 81 insertions, 49 deletions
diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py index 3e124c7810..3661a24044 100644 --- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py +++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py @@ -22,40 +22,49 @@ TEMPLATE = """ #include <math.h> #include <arm_neon.h> -#define STRIDE {stride} +#define STRIDE {rowlen} -#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\ - {rtype} mx0 = {fname}(vld1q_f{prec_short} (variants[v].in[i].arg0)); \\ +#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\ + {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE])); \\ mx0; }})) -struct args +#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\ + {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE]), \\ + vld1q_f{prec_short} (&variants[v].in->arg1[i * STRIDE])); \\ + mx0; }})) + +struct args_1 +{{ + {stype} arg0[{nelems}]; +}}; + +struct args_2 {{ - {stype} arg0[STRIDE]; - double timing; + {stype} arg0[{nelems}]; + {stype} arg1[{nelems}]; }}; struct _variants {{ const char *name; - int count; - const struct args *in; + const struct args_{arity} *in; }}; -static const struct args in0[{rowcount}] = {{ +static const struct args_{arity} in0 = {{ {in_data} }}; static const struct _variants variants[1] = {{ - {{"", {rowcount}, in0}}, + {{"", &in0}}, }}; #define NUM_VARIANTS 1 -#define NUM_SAMPLES(i) (variants[i].count) +#define NUM_SAMPLES(i) ({nelems} / STRIDE) #define VARIANT(i) (variants[i].name) static {rtype} volatile ret; -#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC(i, j); }}) +#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC_{arity}(i, j); }}) #define FUNCNAME "{fname}" #include <bench-libmvec-skeleton.c> """ @@ -63,27 +72,34 @@ static {rtype} volatile ret; def main(name): _, prec, _, func = name.split("-") scalar_to_advsimd_type = {"double": "float64x2_t", "float": "float32x4_t"} - - stride = {"double": 2, "float": 4}[prec] + rowlen = {"double": 2, "float": 4}[prec] rtype = scalar_to_advsimd_type[prec] atype = scalar_to_advsimd_type[prec] - fname = f"_ZGVnN{stride}v_{func}{'f' if prec == 'float' else ''}" prec_short = {"double": 64, "float": 32}[prec] - - with open(f"../benchtests/libmvec/{func}-inputs") as f: - in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")] - in_vals = [in_vals[i:i+stride] for i in range(0, len(in_vals), stride)] - rowcount= len(in_vals) - in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals) - - print(TEMPLATE.format(stride=stride, + input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec] + + with open(f"../benchtests/libmvec/{input_filename}") as f: + input_file = f.readlines() + in_vals = (l.strip() for l in input_file if l and not l.startswith("#")) + # Split in case of multivariate signature + in_vals = (l.split(", ") for l in in_vals) + # Transpose + in_vals = list(zip(*in_vals)) + in_data = ",\n".join("{" + (", ".join(val for val in col) + "}") + for col in in_vals) + + arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec) + fname = f"_ZGVnN{rowlen}{'v' * arity}_{func}{'f' if prec == 'float' else ''}" + + print(TEMPLATE.format(rowlen=rowlen, rtype=rtype, atype=atype, fname=fname, prec_short=prec_short, in_data=in_data, - rowcount=rowcount, - stype=prec)) + stype=prec, + arity=arity, + nelems=len(in_vals[0]))) if __name__ == "__main__": diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py index 66f2c8e0f4..5d9332be9c 100755 --- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py +++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py @@ -22,46 +22,55 @@ TEMPLATE = """ #include <math.h> #include <arm_sve.h> -#define MAX_STRIDE {max_stride} #define STRIDE {stride} #define PTRUE svptrue_b{prec_short} #define SV_LOAD svld1_f{prec_short} #define SV_STORE svst1_f{prec_short} #define REQUIRE_SVE -#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\ - {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\ +#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\ + {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), PTRUE()); \\ mx0; }})) -struct args +#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\ + {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), \\ + SV_LOAD (PTRUE(), &variants[v].in->arg1[i * STRIDE]), \\ + PTRUE()); \\ + mx0; }})) + +struct args_1 {{ - {stype} arg0[MAX_STRIDE]; - double timing; + {stype} arg0[{nelems}]; +}}; + +struct args_2 +{{ + {stype} arg0[{nelems}]; + {stype} arg1[{nelems}]; }}; struct _variants {{ const char *name; - int count; - const struct args *in; + const struct args_{arity} *in; }}; -static const struct args in0[{rowcount}] = {{ +static const struct args_{arity} in0 = {{ {in_data} }}; static const struct _variants variants[1] = {{ - {{"", {rowcount}, in0}}, + {{"", &in0}}, }}; #define NUM_VARIANTS 1 -#define NUM_SAMPLES(i) (variants[i].count) +#define NUM_SAMPLES(i) ({nelems} / STRIDE) #define VARIANT(i) (variants[i].name) // Cannot pass volatile pointer to svst1. This still does not appear to get optimised out. -static {stype} /*volatile*/ ret[MAX_STRIDE]; +static {stype} /*volatile*/ ret[{rowlen}]; -#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }}) +#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC_{arity}(i, j)); }}) #define FUNCNAME "{fname}" #include <bench-libmvec-skeleton.c> """ @@ -69,23 +78,29 @@ static {stype} /*volatile*/ ret[MAX_STRIDE]; def main(name): _, prec, _, func = name.split("-") scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"} - stride = {"double": "svcntd()", "float": "svcntw()"}[prec] rtype = scalar_to_sve_type[prec] atype = scalar_to_sve_type[prec] - fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}" prec_short = {"double": 64, "float": 32}[prec] # Max SVE vector length is 2048 bits. To ensure benchmarks are # vector-length-agnostic, but still use as wide vectors as # possible on any given target, divide input data into 2048-bit # rows, then load/store as many elements as the target will allow. - max_stride = 2048 // prec_short - - with open(f"../benchtests/libmvec/{func}-inputs") as f: - in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")] - in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)] - rowcount= len(in_vals) - in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals) + rowlen = {"double": 32, "float": 64}[prec] + input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec] + + with open(f"../benchtests/libmvec/{input_filename}") as f: + input_file = f.readlines() + in_vals = (l.strip() for l in input_file if l and not l.startswith("#")) + # Split in case of multivariate signature + in_vals = (l.split(", ") for l in in_vals) + # Transpose + in_vals = list(zip(*in_vals)) + in_data = ",\n".join("{" + (", ".join(val for val in col) + "}") + for col in in_vals) + + arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec) + fname = f"_ZGVsMx{'v' * arity}_{func}{'f' if prec == 'float' else ''}" print(TEMPLATE.format(stride=stride, rtype=rtype, @@ -93,9 +108,10 @@ def main(name): fname=fname, prec_short=prec_short, in_data=in_data, - rowcount=rowcount, stype=prec, - max_stride=max_stride)) + rowlen=rowlen, + arity=arity, + nelems=len(in_vals[0]))) if __name__ == "__main__": |