# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Rez Project
'''
Run a benchmarking suite for runtime resolves.
'''
from __future__ import print_function
import json
import os
import os.path
import math
import subprocess
import platform
import sys
import time
# globals
opts = None
out_dir = None
pkg_repo_dir = None
[docs]def setup_parser(parser, completions=False):
parser.add_argument(
"--out", metavar="RESULTS_DIR", default="out",
help="Output dir (default: %(default)s)"
)
parser.add_argument(
"--iterations", type=int, default=1, metavar="N",
help="Run every resolve N times and take the average (default: %(default)s)"
)
parser.add_argument(
"--histogram", action="store_true",
help="Show an ASCII histogram of resolve times (from results in --out)"
)
parser.add_argument(
"--compare", metavar="RESULTS_DIR",
help="Compare RESULTS_DIR to results specified via --out. Ie, if "
"'mean_delta' is negative, then RESULTS_DIR resolves are faster on "
"average than those in --out dir"
)
[docs]def load_packages():
"""Load all packages so loading time doesn't impact solve times
"""
from rez.packages import iter_package_families
print("Warming package cache...")
fams = list(iter_package_families(paths=[pkg_repo_dir]))
for i, fam in enumerate(fams):
sys.stdout.write("\n[%d/%d]" % (i + 1, len(fams)))
for pkg in fam.iter_packages():
pkg.validate_data()
for var in pkg.iter_variants():
pass # just ensures variant objects are created and cached
sys.stdout.write('.')
sys.stdout.flush()
print('')
[docs]def get_system_info():
"""Get system info that might affect resolve time.
"""
from rez import __version__
from rez.utils.execution import Popen
from rez.solver import SOLVER_VERSION
info = {
"rez_version": __version__,
"rez_solver_version": SOLVER_VERSION,
"py_version": "%d.%d" % sys.version_info[:2],
"platform": platform.platform()
}
# this may only work on linux, but that's ok - the important thing is that
# it works in the benchmark workflow, and we run that on linux only
#
try:
proc = Popen(
["cat", "/proc/cpuinfo"],
stdout=subprocess.PIPE,
text=True
)
out, _ = proc.communicate()
if proc.returncode == 0:
# parse output, lines are like 'field : value'
fields = {}
for line in out.strip().split('\n'):
if ':' not in line:
continue
parts = line.strip().split(':', 1)
key = parts[0].strip()
value = parts[1].strip()
fields[key] = value
# get the bits we care about
info["num_cpu"] = int(fields["processor"]) + 1
info["cpu"] = fields["model name"]
except:
pass
return info
[docs]def do_resolves():
from rez import module_root_path
from rez.resolved_context import ResolvedContext
from rez.solver import SolverCallbackReturn
filepath = os.path.join(module_root_path, "data", "benchmarking", "requests.json")
with open(filepath) as f:
requests = json.loads(f.read())
print("Performing %d resolves..." % len(requests))
def callback(solver_state):
sys.stdout.write('.')
sys.stdout.flush()
return (SolverCallbackReturn.keep_going, '')
summaries = []
t_start = time.time()
for i, request_list in enumerate(requests):
print("\n[%d/%d]" % (i + 1, len(requests)))
print("Request: %s" % request_list)
sys.stdout.write("Resolving")
sys.stdout.flush()
summary = {
"request": request_list
}
# perform the resolve
try:
secs = 0.0
for _ in range(_opts.iterations):
t = time.time()
ctxt = ResolvedContext(
package_requests=request_list,
package_paths=[pkg_repo_dir],
add_implicit_packages=False,
callback=callback
)
secs += time.time() - t
resolve_time = secs / _opts.iterations
print('\n')
if ctxt.success:
summary.update({
"status": "success",
"resolve_time": resolve_time,
"resolved_packages": [
os.path.relpath(x.uri, pkg_repo_dir)
for x in ctxt.resolved_packages
]
})
else:
summary.update({
"status": "failed",
"resolve_time": resolve_time
})
except Exception as e:
print("\n%s" % str(e))
summary.update({
"status": "error",
"error": str(e)
})
summaries.append(summary)
# store resolve results to file
with open(os.path.join(out_dir, "resolves.json"), 'w') as f:
f.write(json.dumps(summaries, indent=2))
# calculate, print results and store to file
total_secs = time.time() - t_start
errors = [x for x in summaries if x["status"] == "error"]
fails = [x for x in summaries if x["status"] == "failed"]
resolve_times = [
x["resolve_time"] for x in summaries
if x["status"] in ("success", "failed")
]
n_resolve_times = len(resolve_times)
stats = {
"total_run_time": total_secs,
"num_success_resolves": n_resolve_times,
"num_error_resolves": len(errors),
"num_failed_resolves": len(fails),
}
stats.update(get_system_info())
if resolve_times:
resolve_times = sorted(resolve_times)
median_resolve_time = resolve_times[n_resolve_times // 2]
avg_resolve_time = sum(resolve_times) / float(n_resolve_times)
min_resolve_time = min(resolve_times)
max_resolve_time = max(resolve_times)
stddev = math.sqrt(
sum((x - avg_resolve_time) ** 2 for x in resolve_times) / float(n_resolve_times)
)
stats.update({
"median": median_resolve_time,
"mean": avg_resolve_time,
"min": min_resolve_time,
"max": max_resolve_time,
"stddev": stddev
})
print("\n\nRESULT:")
stats_str = json.dumps(stats, indent=2)
print(stats_str)
with open(os.path.join(out_dir, "summary.json"), 'w') as f:
f.write(stats_str)
[docs]def run_benchmark():
from rez import module_root_path
from rez.utils.execution import Popen
if os.path.exists(out_dir):
print(
"Dir specified by --out (%s) must not exist" % out_dir,
file=sys.stderr
)
sys.exit(1)
os.mkdir(out_dir)
print("Writing results to %s..." % out_dir)
# extract package repo
filepath = os.path.join(module_root_path, "data", "benchmarking", "packages.tar.gz")
proc = Popen(
["tar", "-xf", filepath],
cwd=out_dir
)
proc.wait()
load_packages()
do_resolves()
[docs]def print_histogram():
n_rows = 40
n_columns = 40
resolve_times = []
buckets = [0] * n_rows
with open(os.path.join(out_dir, "resolves.json")) as f:
summaries = json.loads(f.read())
for summary in summaries:
if "resolve_time" not in summary:
continue
resolve_times.append(summary["resolve_time"])
# place resolve times into buckets
max_resolve_time = max(resolve_times)
min_resolve_time = min(resolve_times)
bucket_size = (max_resolve_time - min_resolve_time) / n_rows
for resolve_time in resolve_times:
i_bucket = int((resolve_time - min_resolve_time) / bucket_size)
i_bucket = min(i_bucket, n_rows - 1)
buckets[i_bucket] += 1
# normalise buckets wrt max columns
max_bucket = max(buckets)
mult = n_columns / float(max_bucket)
# print histogram
start_t = min_resolve_time
test_str = "[%.2f-%.2f]" % (max_resolve_time, max_resolve_time)
max_left_column_w = len(test_str)
for i in range(n_rows):
bucket = buckets[i]
end_t = start_t + bucket_size
columns = int(bucket * mult)
left_column = "[%.2f-%.2f]" % (start_t, end_t)
n = max_left_column_w - len(left_column)
left_column = (' ' * n) + left_column
print("%s |%s" % (left_column, '#' * columns))
start_t = end_t
[docs]def compare():
out_dir2 = _opts.compare
with open(os.path.join(out_dir, "resolves.json")) as f:
summaries1 = json.loads(f.read())
with open(os.path.join(out_dir2, "resolves.json")) as f:
summaries2 = json.loads(f.read())
# list resolves that don't match
for i, summary1 in enumerate(summaries1):
try:
summary2 = summaries2[i]
except IndexError:
continue
request = summary1.get("request")
resolve1 = summary1.get("resolved_packages")
resolve2 = summary2.get("resolved_packages")
if resolve1 != resolve2:
print(
"MISMATCHING RESULT (#%d):\n"
"REQUEST: %r\n"
"RESOLVE FROM %s: %r\n"
"RESOLVE FROM %s: %r"
% (i, request, out_dir, resolve1, out_dir2, resolve2),
file=sys.stderr
)
sys.exit(1)
# show delta of summaries (avg solve time etc)
with open(os.path.join(out_dir, "summary.json")) as f:
summary1 = json.loads(f.read())
with open(os.path.join(out_dir2, "summary.json")) as f:
summary2 = json.loads(f.read())
delta_summary = {}
for field in ("max", "min", "mean", "median", "stddev"):
delta = summary2[field] - summary1[field]
pct = 100.0 * (delta / summary1[field])
pct_str = "%.2f%%" % pct
if not pct_str.startswith('-'):
pct_str = '+' + pct_str
delta_summary["%s_delta" % field] = (delta, pct_str)
print(json.dumps(delta_summary, indent=2))
[docs]def command(opts, parser, extra_arg_groups=None):
global _opts
global out_dir
global pkg_repo_dir
_opts = opts
out_dir = os.path.abspath(opts.out)
pkg_repo_dir = os.path.join(out_dir, "packages")
if opts.histogram:
print_histogram()
elif opts.compare:
compare()
else:
run_benchmark()