aboutsummaryrefslogtreecommitdiff
path: root/benchtests/scripts/import_bench.py
diff options
context:
space:
mode:
Diffstat (limited to 'benchtests/scripts/import_bench.py')
-rw-r--r--benchtests/scripts/import_bench.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
index 81248c2adf..d37ff62383 100644
--- a/benchtests/scripts/import_bench.py
+++ b/benchtests/scripts/import_bench.py
@@ -25,6 +25,102 @@ except ImportError:
raise
+def mean(lst):
+ """Compute and return mean of numbers in a list
+
+ The numpy average function has horrible performance, so implement our
+ own mean function.
+
+ Args:
+ lst: The list of numbers to average.
+ Return:
+ The mean of members in the list.
+ """
+ return sum(lst) / len(lst)
+
+
+def split_list(bench, func, var):
+ """ Split the list into a smaller set of more distinct points
+
+ Group together points such that the difference between the smallest
+ point and the mean is less than 1/3rd of the mean. This means that
+ the mean is at most 1.5x the smallest member of that group.
+
+ mean - xmin < mean / 3
+ i.e. 2 * mean / 3 < xmin
+ i.e. mean < 3 * xmin / 2
+
+ For an evenly distributed group, the largest member will be less than
+ twice the smallest member of the group.
+ Derivation:
+
+ An evenly distributed series would be xmin, xmin + d, xmin + 2d...
+
+ mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
+ and max element is xmin + (n - 1) * d
+
+ Now, mean < 3 * xmin / 2
+
+ 3 * xmin > 2 * mean
+ 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
+ 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
+ n * xmin > n * (n - 1) * d
+ xmin > (n - 1) * d
+ 2 * xmin > xmin + (n-1) * d
+ 2 * xmin > xmax
+
+ Hence, proved.
+
+ Similarly, it is trivial to prove that for a similar aggregation by using
+ the maximum element, the maximum element in the group must be at most 4/3
+ times the mean.
+
+ Args:
+ bench: The benchmark object
+ func: The function name
+ var: The function variant name
+ """
+ means = []
+ lst = bench['functions'][func][var]['timings']
+ last = len(lst) - 1
+ while lst:
+ for i in range(last + 1):
+ avg = mean(lst[i:])
+ if avg > 0.75 * lst[last]:
+ means.insert(0, avg)
+ lst = lst[:i]
+ last = i - 1
+ break
+ bench['functions'][func][var]['timings'] = means
+
+
+def do_for_all_timings(bench, callback):
+ """Call a function for all timing objects for each function and its
+ variants.
+
+ Args:
+ bench: The benchmark object
+ callback: The callback function
+ """
+ for func in bench['functions'].keys():
+ for k in bench['functions'][func].keys():
+ if 'timings' not in bench['functions'][func][k].keys():
+ continue
+
+ callback(bench, func, k)
+
+
+def compress_timings(points):
+ """Club points with close enough values into a single mean value
+
+ See split_list for details on how the clubbing is done.
+
+ Args:
+ points: The set of points.
+ """
+ do_for_all_timings(points, split_list)
+
+
def parse_bench(filename, schema_filename):
"""Parse the input file