Import data & parse¶
In [1]:
from os import listdir
from os.path import join, isdir, isfile
import re
import traceback
from typing import TypedDict
import json
DATA_PREFIX = "Using data type "
DELAY_PREFIX = "Using delay of "
SHARED_SECURE_PREFIX = "Shared secure "
INSTANCE_PREFIX = "Using instance type "
CRITERIA_PREFIX = "Using criteria "
FP_QUERY_MAX_LEN_PREFIX = "Foot paths query max len "
FP_RUN_MAX_LEN_PREFIX = "Foot paths run max len "
BENCHMARK_PREFIX = "Benchmark of "
SCANNED_FP_PREFIX = "scanned fp cnt "
MARKED_TR_PREFIX = "marked size traverse route "
MARKED_FP_PREFIX = "marked size fp lookup "
F_ROUTE_BAG_SZ_PREFIX = "final route bag sz="
FP_BAG_SZ_PREFIX = "fp b sz "
UNKNOWN_LITERAL = "Unknown"
def parse_bench_time(bench_time: str) -> int:
"""00:00:093.9852490000003"""
total_ms = 0
for i, part in enumerate(bench_time.split(":")):
total_ms += round(float(part)) * (
60 * 1_000 if i == 0 else 1_000 if i == 1 else 1
)
return total_ms
class Result(TypedDict):
data: str
delay: tuple[int, int] | None
instance: str
shared_secure: bool | None
criteria: list[str]
fp_query_max_len: int
fp_run_max_len: int
# Benchmarks
compute_data: tuple[int, int]
create_instance: tuple[int, int]
run: tuple[int, int]
# {round, {step, [begin, end]}}
run_steps: dict[int, dict[str, tuple[float, float]]]
result: tuple[int, int]
post_treatment: tuple[int, int]
# Run stats
scanned_fp: dict[int, list[int]]
marked_tr: dict[int, list[int]]
marked_fp: dict[int, list[int]]
# MC-specific
f_route_bag_sz: dict[int, list[int]] | None
fp_bag_sz: dict[int, list[int]] | None
results: dict[str, Result] = {}
RESULTS_FOLDER = "./results"
for node in listdir(RESULTS_FOLDER):
node_path = join(RESULTS_FOLDER, node)
if isdir(node_path):
for file in listdir(node_path):
file_path = join(node_path, file)
if file == "out.txt" and isfile(file_path):
print(f"Loading {file_path} ...", end=" ")
try:
with open(file_path) as raw_output:
result = Result(
data=UNKNOWN_LITERAL,
delay=None,
instance=UNKNOWN_LITERAL,
shared_secure=None,
criteria=[UNKNOWN_LITERAL],
fp_query_max_len=-1,
fp_run_max_len=-1,
compute_data=(0, -1),
create_instance=(0, -1),
run=(0, -1),
run_steps={},
result=(0, -1),
post_treatment=(0, -1),
scanned_fp={},
marked_tr={},
marked_fp={},
f_route_bag_sz=None,
fp_bag_sz=None,
)
k = 0
for line in raw_output:
line = line[:-1]
if line.startswith(DATA_PREFIX):
result["data"] = line[len(DATA_PREFIX) :]
elif line.startswith(DELAY_PREFIX):
neg, pos = line[len(DELAY_PREFIX) : -1].split("s, ")
result["delay"] = (int(neg), int(pos))
elif line.startswith(INSTANCE_PREFIX):
result["instance"] = line[len(INSTANCE_PREFIX) :]
elif line.startswith(SHARED_SECURE_PREFIX):
result["shared_secure"] = json.loads(
line[len(SHARED_SECURE_PREFIX) :]
)
elif line.startswith(CRITERIA_PREFIX):
result["criteria"] = json.loads(
line[len(CRITERIA_PREFIX) :].replace("'", '"')
)
elif line.startswith(FP_QUERY_MAX_LEN_PREFIX):
result["fp_query_max_len"] = json.loads(
line[len(FP_QUERY_MAX_LEN_PREFIX) :]
)
elif line.startswith(FP_RUN_MAX_LEN_PREFIX):
result["fp_run_max_len"] = json.loads(
line[len(FP_RUN_MAX_LEN_PREFIX) :]
)
elif line.startswith(BENCHMARK_PREFIX):
# Benchmarks
line = line[len(BENCHMARK_PREFIX) :]
fun_name = line.split(" ")[0]
line = line[len(fun_name + " (") :]
times = int(line.split(" ")[0])
line = line[len(str(times) + " times): ") :]
duration = parse_bench_time(line)
if fun_name.startswith("compute"):
result["compute_data"] = (times, duration)
elif fun_name.startswith("create"):
result["create_instance"] = (times, duration)
elif fun_name.startswith("run"):
result["run"] = (times, duration)
elif fun_name.startswith("result"):
result["result"] = (times, duration)
elif fun_name.startswith("postTreatment"):
result["post_treatment"] = (times, duration)
elif (
matches := re.search(
r"^\[(?P<step_time>\d+.\d+)\] (?P<step_kind>begin|end) (?P<step_name>.+)$",
line,
)
) is not None:
step_name, step_kind, step_time = matches.group(
"step_name", "step_kind", "step_time"
)
if step_name.startswith("round "):
k = int(step_name[len("round ") :])
run_steps_k = result["run_steps"].setdefault(k, {})
(begin, end) = run_steps_k.setdefault(step_name, (0, 0))
run_steps_k[step_name] = (
(begin + float(step_time), end)
if step_kind == "begin"
else (
begin,
end + float(step_time),
)
)
if step_name == "end":
k = 0
elif (
matches := re.search(
r"^\[(?P<step_time>\d+.\d+)\] (?P<msg>.+)$",
line,
)
) is not None:
line = matches.group("msg")
if line.startswith(SCANNED_FP_PREFIX):
result["scanned_fp"].setdefault(k, []).append(
int(line[len(SCANNED_FP_PREFIX) :])
)
elif line.startswith(MARKED_TR_PREFIX):
result["marked_tr"].setdefault(k, []).append(
int(line[len(MARKED_TR_PREFIX) :])
)
elif line.startswith(MARKED_FP_PREFIX):
result["marked_fp"].setdefault(k, []).append(
int(line[len(MARKED_FP_PREFIX) :])
)
elif line.startswith(F_ROUTE_BAG_SZ_PREFIX):
if result["f_route_bag_sz"] is None:
result["f_route_bag_sz"] = {}
result["f_route_bag_sz"].setdefault(k, []).append(
int(line[len(F_ROUTE_BAG_SZ_PREFIX) :])
)
elif line.startswith(FP_BAG_SZ_PREFIX):
if result["fp_bag_sz"] is None:
result["fp_bag_sz"] = {}
result["fp_bag_sz"].setdefault(k, []).append(
int(line[len(FP_BAG_SZ_PREFIX) :])
)
run_times = result["run"][0]
for k, run_steps_k in result["run_steps"].items():
for step_name, (begin, end) in run_steps_k.items():
run_steps_k[step_name] = (
begin / run_times,
end / run_times,
)
results[node] = result
print("Done.")
except Exception as exc:
print("Error: \n", "\n".join(traceback.format_exception(exc)))
# results
Loading ./results/int-sr/out.txt ... Done. Loading ./results/scal-r/out.txt ... Done. Loading ./results/int-mcsr/out.txt ... Done. Loading ./results/scal-sr/out.txt ... Done. Loading ./results/scal-mcr--spi-pos2-neg1/out.txt ... Done. Loading ./results/int-mcsr--fd-pos2-neg1/out.txt ... Done. Loading ./results/int-mcsr--spi-pos2-neg1/out.txt ... Done. Loading ./results/scal-mcr/out.txt ... Done. Loading ./results/scal-mcsr--fd-pos2-neg1/out.txt ... Done. Loading ./results/int-r/out.txt ... Done. Loading ./results/scal-mcsr--bt-pos2-neg1/out.txt ... Done. Loading ./results/int-mcr--spi-pos2-neg1/out.txt ... Done. Loading ./results/scal-mcr--bt-pos2-neg1/out.txt ... Done. Loading ./results/scal-mcr--fd-pos2-neg1/out.txt ... Done. Loading ./results/int-mcr/out.txt ... Done. Loading ./results/scal-mcsr/out.txt ... Done. Loading ./results/int-mcsr--bt-pos2-neg1/out.txt ... Done. Loading ./results/scal-mcsr--spi-pos2-neg1/out.txt ... Done. Loading ./results/int-mcr--fd-pos2-neg1/out.txt ... Done. Loading ./results/int-mcr--bt-pos2-neg1/out.txt ... Done.
Format data¶
In [2]:
from functools import reduce
results_list = sorted(results.items(), key=lambda result: result[0])
# {k: [result_idx, begin, end][]}
run_steps_round: dict[int, list[tuple[int, float, float]]] = {}
# {step_name: [k, result_idx, begin, end][]}
run_steps: dict[str, list[tuple[int, int, float, float]]] = {}
for i, (_, result) in enumerate(results_list):
for k, run_steps_k in result["run_steps"].items():
for step_name, (begin, end) in run_steps_k.items():
is_round = re.match(r"round \d+", step_name) is not None
if is_round:
run_steps_round.setdefault(k, []).append((i, begin, end))
else:
run_steps.setdefault(step_name, []).append((k, i, begin, end))
def dict_append_by_key[K, T](d: dict[K, list[T]], k: K, el: T) -> dict[K, list[T]]:
d.setdefault(k, []).append(el)
return d
results_by_data: dict[str, list[tuple[str, Result]]] = reduce(
lambda acc, v: dict_append_by_key(acc, v[1]["data"], v), results_list, {}
)
results_by_instance: dict[str, list[tuple[str, Result]]] = reduce(
lambda acc, v: dict_append_by_key(acc, v[1]["instance"], v), results_list, {}
)
results_by_data_instance: dict[str, list[tuple[str, Result]]] = reduce(
lambda acc, v: dict_append_by_key(acc, f"{v[1]["data"]}-{v[1]["instance"]}", v),
results_list,
{},
)
Plot¶
In [3]:
from math import floor
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import AutoMinorLocator
import numpy as np
def format_ms(ms: float):
return f"{floor(ms / 1000)}:{ms % 1000}"
Plot pre-running¶
In [4]:
fig, ax = plt.subplots(figsize=(len(results_by_data_instance) + 2, 7))
results_by_data_instance_list = list(results_by_data_instance.items())
ax_plt = ax.plot(
[
sum(map(lambda result: result[1]["compute_data"][1], results)) / len(results)
for (_, results) in results_by_data_instance_list
],
"o--",
color="black",
label="Compute data",
)
_ = ax.tick_params(axis="y", labelcolor=ax_plt[0].get_color())
_ = ax.set_xticks(
range(len(results_by_data_instance_list)),
[data_instance for (data_instance, _) in results_by_data_instance_list],
rotation=45,
ha="right",
)
ax.yaxis.set_major_formatter(lambda x, _: format_ms(x))
ax.yaxis.set_minor_locator(AutoMinorLocator())
_ = ax.set_ylabel("Duration (s:ms)")
_ = ax.set_xlabel("Data-instance")
ax2 = ax.twinx()
ax2_plt = ax2.plot(
[
sum(map(lambda result: result[1]["create_instance"][1], results)) / len(results)
for (_, results) in results_by_data_instance_list
],
"x:",
ms=5,
color="green",
label="Create instance",
)
_ = ax2.tick_params(axis="y", labelcolor=ax2_plt[0].get_color())
ax2.yaxis.set_major_formatter(lambda x, _: format_ms(x))
_ = ax2.set_ylabel("Duration (s:ms)")
leg = fig.legend(
# loc="upper right",
# # https://stackoverflow.com/a/47370214
# bbox_to_anchor=(1, 1),
# bbox_transform=ax.transAxes,
)
# Move titles to the left from https://stackoverflow.com/a/68261686
_ = ax.set_title(
f"Benchmarks of pre-running RAPTOR (fp_query_len={results_list[0][1]["fp_query_max_len"]},fp_run_len={results_list[0][1]["fp_run_max_len"]})"
)
Plot running¶
In [5]:
FIG_WIDTH=len(results) + 2
fig, ax = plt.subplots(figsize=(FIG_WIDTH, 7))
color_rounds = plt.get_cmap("magma")(
np.linspace(
0,
1,
max([k for _, result in results_list for k, _ in result["run_steps"].items()])
+ 1,
)
)
step_names_to_idx = {step_name: i for i, step_name in enumerate(run_steps)}
color_steps = plt.get_cmap("viridis")(
np.linspace(
0,
1,
len(step_names_to_idx),
)
)
line_rounds = []
for k, steps in run_steps_round.items():
line_rounds.append(
ax.vlines(
[i - 0.03 for i, _, _ in steps],
[begin for _, begin, _ in steps],
[end for _, _, end in steps],
colors=(color_rounds[k]),
linewidths=4,
label=str(k),
)
)
line_steps = []
for step_name, steps in run_steps.items():
line_steps.append(
ax.vlines(
[i + 0.03 for _, i, _, _ in steps],
[begin for _, _, begin, _ in steps],
[end for _, _, _, end in steps],
colors=(color_steps[step_names_to_idx[step_name]]),
linewidths=4,
label=step_name,
)
)
plt_run_dura = ax.plot(
[result["run"][1] for (_, result) in results_list],
"o--",
color="black",
label="run duration mean",
)
_ = ax.set_xticks(
range(len(results_list)),
[result_name for (result_name, _) in results_list],
rotation=45,
ha="right",
)
_ = ax.tick_params(axis="y", labelcolor=plt_run_dura[0].get_color())
ax.yaxis.set_major_formatter(lambda x, _: format_ms(x))
ax.yaxis.set_minor_locator(AutoMinorLocator())
_ = ax.set_ylabel("Duration (s:ms)")
_ = ax.set_xlabel("Benchmark name")
ax2 = ax.twinx()
plt_run_times = ax2.plot(
[result["run"][0] for (_, result) in results_list],
"x:",
ms=5,
color="green",
label="#run",
)
_ = ax2.tick_params(axis="y", labelcolor=plt_run_times[0].get_color())
_ = ax2.set_ylabel("#run")
leg = fig.legend(
handles=[
*plt_run_dura,
*plt_run_times,
Patch(visible=False, label="\n$\\bf{Rounds}$"),
*line_rounds,
Patch(visible=False, label="\n$\\bf{Steps}$"),
*line_steps,
],
# loc="upper right",
# # https://stackoverflow.com/a/47370214
# bbox_to_anchor=(1, 1),
# bbox_transform=ax.transAxes,
)
# Move titles to the left from https://stackoverflow.com/a/68261686
for handle, label in zip(leg.legend_handles, leg.texts):
if handle is not None and not handle.get_visible():
width = handle.get_window_extent().width
label.set_horizontalalignment("left")
label.set_position((-2 * width, 0))
_ = ax.set_title(
f"Benchmarks of running RAPTOR (fp_len={results_list[0][1]["fp_run_max_len"]})"
)
Plot metrics¶
In [6]:
from typing import Literal
from matplotlib.axes import Axes
k_cnt = max(map(lambda result: len(result["run_steps"]), results.values())) - 1
metrics: list[
Literal["scanned_fp"]
| Literal["marked_tr"]
| Literal["marked_fp"]
| Literal["f_route_bag_sz"]
| Literal["fp_bag_sz"]
] = [
"scanned_fp",
"marked_tr",
"marked_fp",
"f_route_bag_sz",
"fp_bag_sz",
]
for metric in metrics:
fig = plt.figure(
layout="constrained",
figsize=(
FIG_WIDTH,
6 * k_cnt,
),
)
_ = fig.suptitle(f"{metric}")
axs: list[Axes] = fig.subplots(k_cnt, 1, sharex=True)
for k, ax in enumerate(axs):
pos_data: list[tuple[int, list[int]]] = list(
filter(
lambda p_d: p_d[1] is not None,
[
(
i + 1,
(
result[metric][k + 1]
if k + 1 in (result[metric] or {})
else None
),
)
for i, (_, result) in enumerate(results_list)
],
)
)
_ = ax.violinplot(
[data for _, data in pos_data],
[pos for pos, _ in pos_data],
showmeans=True,
)
_ = ax.set_xticks(
range(1, len(results_list) + 1),
[result_name for (result_name, _) in results_list],
rotation=45,
ha="right",
)
_ = ax.set_title(f"Round {k+1}")
# Thousand separator, from https://stackoverflow.com/questions/16670125/python-format-string-thousand-separator-with-spaces
ax.yaxis.set_major_formatter(lambda x, _: "{:,}".format(x))
_ = ax.yaxis.set_minor_locator(AutoMinorLocator())