Import data & parse¶

In [1]:
from os import listdir
from os.path import join, isdir, isfile
import re
import traceback
from typing import TypedDict
import json

DATA_PREFIX = "Using data type "
DELAY_PREFIX = "Using delay of "
SHARED_SECURE_PREFIX = "Shared secure "
INSTANCE_PREFIX = "Using instance type "
CRITERIA_PREFIX = "Using criteria "
FP_QUERY_MAX_LEN_PREFIX = "Foot paths query max len "
FP_RUN_MAX_LEN_PREFIX = "Foot paths run max len "
BENCHMARK_PREFIX = "Benchmark of "
SCANNED_FP_PREFIX = "scanned fp cnt "
MARKED_TR_PREFIX = "marked size traverse route "
MARKED_FP_PREFIX = "marked size fp lookup "
F_ROUTE_BAG_SZ_PREFIX = "final route bag sz="
FP_BAG_SZ_PREFIX = "fp b sz "

UNKNOWN_LITERAL = "Unknown"


def parse_bench_time(bench_time: str) -> int:
    """00:00:093.9852490000003"""
    total_ms = 0

    for i, part in enumerate(bench_time.split(":")):
        total_ms += round(float(part)) * (
            60 * 1_000 if i == 0 else 1_000 if i == 1 else 1
        )

    return total_ms


class Result(TypedDict):
    data: str
    delay: tuple[int, int] | None
    instance: str
    shared_secure: bool | None
    criteria: list[str]
    fp_query_max_len: int
    fp_run_max_len: int

    # Benchmarks
    compute_data: tuple[int, int]
    create_instance: tuple[int, int]
    run: tuple[int, int]
    # {round, {step, [begin, end]}}
    run_steps: dict[int, dict[str, tuple[float, float]]]
    result: tuple[int, int]
    post_treatment: tuple[int, int]

    # Run stats
    scanned_fp: dict[int, list[int]]
    marked_tr: dict[int, list[int]]
    marked_fp: dict[int, list[int]]
    # MC-specific
    f_route_bag_sz: dict[int, list[int]] | None
    fp_bag_sz: dict[int, list[int]] | None


results: dict[str, Result] = {}

RESULTS_FOLDER = "./results"

for node in listdir(RESULTS_FOLDER):
    node_path = join(RESULTS_FOLDER, node)
    if isdir(node_path):
        for file in listdir(node_path):
            file_path = join(node_path, file)
            if file == "out.txt" and isfile(file_path):
                print(f"Loading {file_path} ...", end=" ")
                try:
                    with open(file_path) as raw_output:
                        result = Result(
                            data=UNKNOWN_LITERAL,
                            delay=None,
                            instance=UNKNOWN_LITERAL,
                            shared_secure=None,
                            criteria=[UNKNOWN_LITERAL],
                            fp_query_max_len=-1,
                            fp_run_max_len=-1,
                            compute_data=(0, -1),
                            create_instance=(0, -1),
                            run=(0, -1),
                            run_steps={},
                            result=(0, -1),
                            post_treatment=(0, -1),
                            scanned_fp={},
                            marked_tr={},
                            marked_fp={},
                            f_route_bag_sz=None,
                            fp_bag_sz=None,
                        )

                        k = 0

                        for line in raw_output:
                            line = line[:-1]

                            if line.startswith(DATA_PREFIX):
                                result["data"] = line[len(DATA_PREFIX) :]
                            elif line.startswith(DELAY_PREFIX):
                                neg, pos = line[len(DELAY_PREFIX) : -1].split("s, ")
                                result["delay"] = (int(neg), int(pos))
                            elif line.startswith(INSTANCE_PREFIX):
                                result["instance"] = line[len(INSTANCE_PREFIX) :]
                            elif line.startswith(SHARED_SECURE_PREFIX):
                                result["shared_secure"] = json.loads(
                                    line[len(SHARED_SECURE_PREFIX) :]
                                )
                            elif line.startswith(CRITERIA_PREFIX):
                                result["criteria"] = json.loads(
                                    line[len(CRITERIA_PREFIX) :].replace("'", '"')
                                )
                            elif line.startswith(FP_QUERY_MAX_LEN_PREFIX):
                                result["fp_query_max_len"] = json.loads(
                                    line[len(FP_QUERY_MAX_LEN_PREFIX) :]
                                )
                            elif line.startswith(FP_RUN_MAX_LEN_PREFIX):
                                result["fp_run_max_len"] = json.loads(
                                    line[len(FP_RUN_MAX_LEN_PREFIX) :]
                                )

                            elif line.startswith(BENCHMARK_PREFIX):
                                # Benchmarks
                                line = line[len(BENCHMARK_PREFIX) :]

                                fun_name = line.split(" ")[0]
                                line = line[len(fun_name + " (") :]

                                times = int(line.split(" ")[0])
                                line = line[len(str(times) + " times): ") :]

                                duration = parse_bench_time(line)

                                if fun_name.startswith("compute"):
                                    result["compute_data"] = (times, duration)
                                elif fun_name.startswith("create"):
                                    result["create_instance"] = (times, duration)
                                elif fun_name.startswith("run"):
                                    result["run"] = (times, duration)
                                elif fun_name.startswith("result"):
                                    result["result"] = (times, duration)
                                elif fun_name.startswith("postTreatment"):
                                    result["post_treatment"] = (times, duration)

                            elif (
                                matches := re.search(
                                    r"^\[(?P<step_time>\d+.\d+)\] (?P<step_kind>begin|end) (?P<step_name>.+)$",
                                    line,
                                )
                            ) is not None:
                                step_name, step_kind, step_time = matches.group(
                                    "step_name", "step_kind", "step_time"
                                )

                                if step_name.startswith("round "):
                                    k = int(step_name[len("round ") :])

                                run_steps_k = result["run_steps"].setdefault(k, {})
                                (begin, end) = run_steps_k.setdefault(step_name, (0, 0))

                                run_steps_k[step_name] = (
                                    (begin + float(step_time), end)
                                    if step_kind == "begin"
                                    else (
                                        begin,
                                        end + float(step_time),
                                    )
                                )

                                if step_name == "end":
                                    k = 0

                            elif (
                                matches := re.search(
                                    r"^\[(?P<step_time>\d+.\d+)\] (?P<msg>.+)$",
                                    line,
                                )
                            ) is not None:
                                line = matches.group("msg")

                                if line.startswith(SCANNED_FP_PREFIX):
                                    result["scanned_fp"].setdefault(k, []).append(
                                        int(line[len(SCANNED_FP_PREFIX) :])
                                    )
                                elif line.startswith(MARKED_TR_PREFIX):
                                    result["marked_tr"].setdefault(k, []).append(
                                        int(line[len(MARKED_TR_PREFIX) :])
                                    )
                                elif line.startswith(MARKED_FP_PREFIX):
                                    result["marked_fp"].setdefault(k, []).append(
                                        int(line[len(MARKED_FP_PREFIX) :])
                                    )
                                elif line.startswith(F_ROUTE_BAG_SZ_PREFIX):
                                    if result["f_route_bag_sz"] is None:
                                        result["f_route_bag_sz"] = {}
                                    result["f_route_bag_sz"].setdefault(k, []).append(
                                        int(line[len(F_ROUTE_BAG_SZ_PREFIX) :])
                                    )
                                elif line.startswith(FP_BAG_SZ_PREFIX):
                                    if result["fp_bag_sz"] is None:
                                        result["fp_bag_sz"] = {}
                                    result["fp_bag_sz"].setdefault(k, []).append(
                                        int(line[len(FP_BAG_SZ_PREFIX) :])
                                    )

                        run_times = result["run"][0]
                        for k, run_steps_k in result["run_steps"].items():
                            for step_name, (begin, end) in run_steps_k.items():
                                run_steps_k[step_name] = (
                                    begin / run_times,
                                    end / run_times,
                                )

                        results[node] = result
                        print("Done.")
                except Exception as exc:
                    print("Error: \n", "\n".join(traceback.format_exception(exc)))

# results
Loading ./results/int-sr/out.txt ... Done.
Loading ./results/scal-r/out.txt ... Done.
Loading ./results/int-mcsr/out.txt ... Done.
Loading ./results/scal-sr/out.txt ... Done.
Loading ./results/scal-mcr--spi-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcsr--fd-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcsr--spi-pos2-neg1/out.txt ... Done.
Loading ./results/scal-mcr/out.txt ... Done.
Loading ./results/scal-mcsr--fd-pos2-neg1/out.txt ... Done.
Loading ./results/int-r/out.txt ... Done.
Loading ./results/scal-mcsr--bt-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcr--spi-pos2-neg1/out.txt ... Done.
Loading ./results/scal-mcr--bt-pos2-neg1/out.txt ... Done.
Loading ./results/scal-mcr--fd-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcr/out.txt ... Done.
Loading ./results/scal-mcsr/out.txt ... Done.
Loading ./results/int-mcsr--bt-pos2-neg1/out.txt ... Done.
Loading ./results/scal-mcsr--spi-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcr--fd-pos2-neg1/out.txt ... Done.
Loading ./results/int-mcr--bt-pos2-neg1/out.txt ... Done.

Format data¶

In [2]:
from functools import reduce

results_list = sorted(results.items(), key=lambda result: result[0])

# {k: [result_idx, begin, end][]}
run_steps_round: dict[int, list[tuple[int, float, float]]] = {}
# {step_name: [k, result_idx, begin, end][]}
run_steps: dict[str, list[tuple[int, int, float, float]]] = {}

for i, (_, result) in enumerate(results_list):
    for k, run_steps_k in result["run_steps"].items():
        for step_name, (begin, end) in run_steps_k.items():
            is_round = re.match(r"round \d+", step_name) is not None

            if is_round:
                run_steps_round.setdefault(k, []).append((i, begin, end))
            else:
                run_steps.setdefault(step_name, []).append((k, i, begin, end))


def dict_append_by_key[K, T](d: dict[K, list[T]], k: K, el: T) -> dict[K, list[T]]:
    d.setdefault(k, []).append(el)
    return d


results_by_data: dict[str, list[tuple[str, Result]]] = reduce(
    lambda acc, v: dict_append_by_key(acc, v[1]["data"], v), results_list, {}
)
results_by_instance: dict[str, list[tuple[str, Result]]] = reduce(
    lambda acc, v: dict_append_by_key(acc, v[1]["instance"], v), results_list, {}
)
results_by_data_instance: dict[str, list[tuple[str, Result]]] = reduce(
    lambda acc, v: dict_append_by_key(acc, f"{v[1]["data"]}-{v[1]["instance"]}", v),
    results_list,
    {},
)

Plot¶

In [3]:
from math import floor
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import AutoMinorLocator
import numpy as np


def format_ms(ms: float):
    return f"{floor(ms / 1000)}:{ms % 1000}"

Plot pre-running¶

In [4]:
fig, ax = plt.subplots(figsize=(len(results_by_data_instance) + 2, 7))

results_by_data_instance_list = list(results_by_data_instance.items())

ax_plt = ax.plot(
    [
        sum(map(lambda result: result[1]["compute_data"][1], results)) / len(results)
        for (_, results) in results_by_data_instance_list
    ],
    "o--",
    color="black",
    label="Compute data",
)
_ = ax.tick_params(axis="y", labelcolor=ax_plt[0].get_color())
_ = ax.set_xticks(
    range(len(results_by_data_instance_list)),
    [data_instance for (data_instance, _) in results_by_data_instance_list],
    rotation=45,
    ha="right",
)
ax.yaxis.set_major_formatter(lambda x, _: format_ms(x))
ax.yaxis.set_minor_locator(AutoMinorLocator())
_ = ax.set_ylabel("Duration (s:ms)")
_ = ax.set_xlabel("Data-instance")

ax2 = ax.twinx()
ax2_plt = ax2.plot(
    [
        sum(map(lambda result: result[1]["create_instance"][1], results)) / len(results)
        for (_, results) in results_by_data_instance_list
    ],
    "x:",
    ms=5,
    color="green",
    label="Create instance",
)
_ = ax2.tick_params(axis="y", labelcolor=ax2_plt[0].get_color())
ax2.yaxis.set_major_formatter(lambda x, _: format_ms(x))
_ = ax2.set_ylabel("Duration (s:ms)")

leg = fig.legend(
    # loc="upper right",
    # # https://stackoverflow.com/a/47370214
    # bbox_to_anchor=(1, 1),
    # bbox_transform=ax.transAxes,
)
# Move titles to the left from https://stackoverflow.com/a/68261686

_ = ax.set_title(
    f"Benchmarks of pre-running RAPTOR (fp_query_len={results_list[0][1]["fp_query_max_len"]},fp_run_len={results_list[0][1]["fp_run_max_len"]})"
)
No description has been provided for this image

Plot running¶

In [5]:
FIG_WIDTH=len(results) + 2

fig, ax = plt.subplots(figsize=(FIG_WIDTH, 7))

color_rounds = plt.get_cmap("magma")(
    np.linspace(
        0,
        1,
        max([k for _, result in results_list for k, _ in result["run_steps"].items()])
        + 1,
    )
)
step_names_to_idx = {step_name: i for i, step_name in enumerate(run_steps)}
color_steps = plt.get_cmap("viridis")(
    np.linspace(
        0,
        1,
        len(step_names_to_idx),
    )
)
line_rounds = []
for k, steps in run_steps_round.items():
    line_rounds.append(
        ax.vlines(
            [i - 0.03 for i, _, _ in steps],
            [begin for _, begin, _ in steps],
            [end for _, _, end in steps],
            colors=(color_rounds[k]),
            linewidths=4,
            label=str(k),
        )
    )
line_steps = []
for step_name, steps in run_steps.items():
    line_steps.append(
        ax.vlines(
            [i + 0.03 for _, i, _, _ in steps],
            [begin for _, _, begin, _ in steps],
            [end for _, _, _, end in steps],
            colors=(color_steps[step_names_to_idx[step_name]]),
            linewidths=4,
            label=step_name,
        )
    )
plt_run_dura = ax.plot(
    [result["run"][1] for (_, result) in results_list],
    "o--",
    color="black",
    label="run duration mean",
)
_ = ax.set_xticks(
    range(len(results_list)),
    [result_name for (result_name, _) in results_list],
    rotation=45,
    ha="right",
)
_ = ax.tick_params(axis="y", labelcolor=plt_run_dura[0].get_color())
ax.yaxis.set_major_formatter(lambda x, _: format_ms(x))
ax.yaxis.set_minor_locator(AutoMinorLocator())
_ = ax.set_ylabel("Duration (s:ms)")
_ = ax.set_xlabel("Benchmark name")

ax2 = ax.twinx()
plt_run_times = ax2.plot(
    [result["run"][0] for (_, result) in results_list],
    "x:",
    ms=5,
    color="green",
    label="#run",
)
_ = ax2.tick_params(axis="y", labelcolor=plt_run_times[0].get_color())
_ = ax2.set_ylabel("#run")

leg = fig.legend(
    handles=[
        *plt_run_dura,
        *plt_run_times,
        Patch(visible=False, label="\n$\\bf{Rounds}$"),
        *line_rounds,
        Patch(visible=False, label="\n$\\bf{Steps}$"),
        *line_steps,
    ],
    # loc="upper right",
    # # https://stackoverflow.com/a/47370214
    # bbox_to_anchor=(1, 1),
    # bbox_transform=ax.transAxes,
)
# Move titles to the left from https://stackoverflow.com/a/68261686
for handle, label in zip(leg.legend_handles, leg.texts):
    if handle is not None and not handle.get_visible():
        width = handle.get_window_extent().width
        label.set_horizontalalignment("left")
        label.set_position((-2 * width, 0))

_ = ax.set_title(
    f"Benchmarks of running RAPTOR (fp_len={results_list[0][1]["fp_run_max_len"]})"
)
No description has been provided for this image

Plot metrics¶

In [6]:
from typing import Literal
from matplotlib.axes import Axes

k_cnt = max(map(lambda result: len(result["run_steps"]), results.values())) - 1

metrics: list[
    Literal["scanned_fp"]
    | Literal["marked_tr"]
    | Literal["marked_fp"]
    | Literal["f_route_bag_sz"]
    | Literal["fp_bag_sz"]
] = [
    "scanned_fp",
    "marked_tr",
    "marked_fp",
    "f_route_bag_sz",
    "fp_bag_sz",
]

for metric in metrics:
    fig = plt.figure(
        layout="constrained",
        figsize=(
            FIG_WIDTH,
            6 * k_cnt,
        ),
    )
    _ = fig.suptitle(f"{metric}")

    axs: list[Axes] = fig.subplots(k_cnt, 1, sharex=True)

    for k, ax in enumerate(axs):
        pos_data: list[tuple[int, list[int]]] = list(
            filter(
                lambda p_d: p_d[1] is not None,
                [
                    (
                        i + 1,
                        (
                            result[metric][k + 1]
                            if k + 1 in (result[metric] or {})
                            else None
                        ),
                    )
                    for i, (_, result) in enumerate(results_list)
                ],
            )
        )
        _ = ax.violinplot(
            [data for _, data in pos_data],
            [pos for pos, _ in pos_data],
            showmeans=True,
        )
        _ = ax.set_xticks(
            range(1, len(results_list) + 1),
            [result_name for (result_name, _) in results_list],
            rotation=45,
            ha="right",
        )
        _ = ax.set_title(f"Round {k+1}")
        # Thousand separator, from https://stackoverflow.com/questions/16670125/python-format-string-thousand-separator-with-spaces
        ax.yaxis.set_major_formatter(lambda x, _: "{:,}".format(x))
        _ = ax.yaxis.set_minor_locator(AutoMinorLocator())
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image