81920 / 41320
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display, Markdown, SVG, HTML
import pandas as pd
import altair as alt
import re
import pickle
from utils import seconds_to_ms, ms_to_seconds
from benchmark_result import get_benchmark_results, BenchmarkType, SchedulerType, get_broadcast_source, get_async_source, get_push_source
from benchmarks.utils import echo
from benchmarks.throughput import make_benchmark, make_multiple_message_benchmark, make_push_benchmark
#benchmark_results = get_benchmark_results()
from benchmark_result import BenchmarkResult, Result
with open('saved_results.pkl', 'rb') as saved_results:
benchmark_results = pickle.load(saved_results)
This benchmark comes from benchmarking the runtime of sending arrays of various size to different numbers of engines and returning None.
??make_push_benchmark
source = get_push_source(benchmark_results)
dview = pd.DataFrame(source['DirectView'])
dview['Scheduler name'] = 'DirectView'
dview['Speedup'] = 1
datas = []
for scheduler_name, scheduler_results in source.items():
data = pd.DataFrame(scheduler_results)
data['Scheduler name'] = scheduler_name
data['Speedup'] = dview['Duration in ms'] / data['Duration in ms']
datas.append(data)
data = pd.concat(datas)
data
ldata = data[data['Number of bytes'] == 2000_000]
alt.Chart(ldata).mark_line(point=True).encode(
alt.X(
'Number of engines',
scale=alt.Scale(type='log', base=2)
),
alt.Y(
'Duration in ms',
scale=alt.Scale(type='log')
),
color='Scheduler name:N',
tooltip='Duration in ms',
).configure_axis(labelFontSize=20, titleFontSize=20).properties(title='Runtime of apply using DirectView', width=1080).interactive().display(renderer='svg')
ldata = data[data['Number of engines'] == 1024]
ldata
for scheduler_name in data['Scheduler name'].unique():
scheduler_data = data[data['Scheduler name'] == scheduler_name]
alt.Chart(scheduler_data).mark_line(point=True).encode(
alt.X(
'Number of bytes',
scale=alt.Scale(type='log')
),
alt.Y(
'Duration in ms',
scale=alt.Scale(type='log')
),
color='Number of engines:N',
tooltip='Duration in ms',
).properties(title=scheduler_name, width=800).interactive().display(renderer='svg')
ldata = data[data['Number of bytes'] == 1000]
alt.Chart(ldata).mark_line(point=True).encode(
alt.X(
'Number of engines',
scale=alt.Scale(type='log', base=2)
),
alt.Y(
'Duration in ms',
scale=alt.Scale(type='log')
),
color='Scheduler name:N',
tooltip='Duration in ms',
).configure_axis(labelFontSize=20, titleFontSize=20).properties(title='Runtime of apply using DirectView', width=1080).interactive().display(renderer='svg')
display(Markdown(f'## Results for duration[DirectView]/duration[scheduler]'))
for scheduler_name in data['Scheduler name'].unique():
if scheduler_name == 'DirectView':
continue
alt.Chart(data[data['Scheduler name'] == scheduler_name]).mark_line(point=True).encode(
alt.X(
'Number of bytes',
scale=alt.Scale(type='log')
),
alt.Y(
'Speedup',
),
color='Number of engines:N',
tooltip='Number of engines',
).properties(title=scheduler_name, width=800).interactive().display(renderer='svg')
data['combined']= data['Scheduler name'] + ' ' + data['Number of bytes'].astype(str)
alt.Chart(data[data['Scheduler name'] != 'DirectView']).mark_line(point=True).encode(
alt.X(
'Number of engines',
scale=alt.Scale(type='log')
),
alt.Y(
'Speedup',
),
color='Number of bytes:N',
strokeDash=alt.StrokeDash(shorthand='Scheduler name', legend=None),
tooltip='combined',
).properties(title='schedulers vs directView scaling engines', width=800).interactive().display(renderer='svg')
for engine in data['Number of engines'].unique():
alt.Chart(data[data['Number of engines'] == engine]).mark_bar().encode(
x='Scheduler name',
y='Duration in ms',
color='Scheduler name:N',
column='Number of bytes:N',
tooltip='Duration in ms'
).properties(title=f'Runtime on {engine} engines:').interactive().display(renderer='svg')