In [9]:
81920 / 41320
Out[9]:
1.9825750242013553
In [11]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display, Markdown, SVG, HTML
import pandas as pd
import altair as alt
import re
import pickle
from utils import seconds_to_ms, ms_to_seconds
from benchmark_result import get_benchmark_results, BenchmarkType, SchedulerType, get_broadcast_source, get_async_source, get_push_source
from benchmarks.utils import echo
from benchmarks.throughput import make_benchmark, make_multiple_message_benchmark, make_push_benchmark
In [12]:
#benchmark_results = get_benchmark_results()
from benchmark_result import BenchmarkResult, Result 
with open('saved_results.pkl', 'rb') as saved_results:
    benchmark_results = pickle.load(saved_results)

time_push

This benchmark comes from benchmarking the runtime of sending arrays of various size to different numbers of engines and returning None.

In [13]:
??make_push_benchmark
Signature: make_push_benchmark(get_view)
Docstring: <no docstring>
Source:   
def make_push_benchmark(get_view):
    class PushMessageSuite:
        param_names = ['Number of engines', 'Number of bytes']
        timer = timeit.default_timer
        timeout = 120
        params = [engines, byte_param]

        view = None
        client = None

        def setup(self, number_of_engines, number_of_bytes):
            self.client = ipp.Client(profile='asv')
            self.view = get_view(self)
            self.view.targets = list(range(number_of_engines))
            wait_for(lambda: len(self.client) >= number_of_engines)

        def time_broadcast(self, engines, number_of_bytes):
            reply = self.view.apply_sync(
                lambda x: None, np.array([0] * number_of_bytes, dtype=np.int8)
            )

        def teardown(self, *args):
            if self.client:
                self.client.close()

    return PushMessageSuite
File:      ~/ipyparallel_master_project/benchmarks/throughput.py
Type:      function
In [14]:
source = get_push_source(benchmark_results)
In [15]:
dview = pd.DataFrame(source['DirectView']) 
dview['Scheduler name'] = 'DirectView'
dview['Speedup'] = 1
In [16]:
datas = []
for scheduler_name, scheduler_results in source.items():
    data = pd.DataFrame(scheduler_results) 
    data['Scheduler name'] = scheduler_name
    data['Speedup'] = dview['Duration in ms'] / data['Duration in ms']
    datas.append(data)
data = pd.concat(datas)
data
Out[16]:
Duration in ms Number of bytes Number of engines Scheduler name Speedup
0 6.30 1000 1 DirectView 1.000000
1 8.15 10000 1 DirectView 1.000000
2 12.97 100000 1 DirectView 1.000000
3 81.41 1000000 1 DirectView 1.000000
4 158.57 2000000 1 DirectView 1.000000
... ... ... ... ... ...
35 577.71 1000 1024 NonCoalescing 1.142511
36 598.62 10000 1024 NonCoalescing 1.082156
37 597.03 100000 1024 NonCoalescing 1.319331
38 666.20 1000000 1024 NonCoalescing 2.073837
39 806.00 2000000 1024 NonCoalescing 3.242618

120 rows × 5 columns

In [17]:
ldata = data[data['Number of bytes'] == 2000_000]
alt.Chart(ldata).mark_line(point=True).encode(
    alt.X(
        'Number of engines',
        scale=alt.Scale(type='log', base=2)
    ),
    alt.Y(
        'Duration in ms',
        scale=alt.Scale(type='log')
    ),
    color='Scheduler name:N',
    tooltip='Duration in ms',
).configure_axis(labelFontSize=20, titleFontSize=20).properties(title='Runtime of apply using DirectView', width=1080).interactive().display(renderer='svg')
In [18]:
ldata = data[data['Number of engines'] == 1024]
ldata
Out[18]:
Duration in ms Number of bytes Number of engines Scheduler name Speedup
35 660.04 1000 1024 DirectView 1.000000
36 647.80 10000 1024 DirectView 1.000000
37 787.68 100000 1024 DirectView 1.000000
38 1381.59 1000000 1024 DirectView 1.000000
39 2613.55 2000000 1024 DirectView 1.000000
35 164.12 1000 1024 Coalescing 4.021691
36 164.27 10000 1024 Coalescing 3.943508
37 173.36 100000 1024 Coalescing 4.543609
38 284.16 1000000 1024 Coalescing 4.862014
39 718.34 2000000 1024 Coalescing 3.638319
35 577.71 1000 1024 NonCoalescing 1.142511
36 598.62 10000 1024 NonCoalescing 1.082156
37 597.03 100000 1024 NonCoalescing 1.319331
38 666.20 1000000 1024 NonCoalescing 2.073837
39 806.00 2000000 1024 NonCoalescing 3.242618
In [19]:
for scheduler_name in data['Scheduler name'].unique():
    scheduler_data = data[data['Scheduler name'] == scheduler_name]
    alt.Chart(scheduler_data).mark_line(point=True).encode(
        alt.X(
            'Number of bytes',
            scale=alt.Scale(type='log')
        ),
        alt.Y(
            'Duration in ms',
            scale=alt.Scale(type='log')

        ),
        color='Number of engines:N',
        tooltip='Duration in ms',
    
    ).properties(title=scheduler_name, width=800).interactive().display(renderer='svg')
In [21]:
ldata = data[data['Number of bytes'] == 1000]
alt.Chart(ldata).mark_line(point=True).encode(
    alt.X(
        'Number of engines',
        scale=alt.Scale(type='log', base=2)
    ),
    alt.Y(
        'Duration in ms',
        scale=alt.Scale(type='log')
    ),
    color='Scheduler name:N',
    tooltip='Duration in ms',
).configure_axis(labelFontSize=20, titleFontSize=20).properties(title='Runtime of apply using DirectView', width=1080).interactive().display(renderer='svg')
In [11]:
display(Markdown(f'## Results for duration[DirectView]/duration[scheduler]'))
for scheduler_name in data['Scheduler name'].unique():
    if scheduler_name == 'DirectView':
        continue
    alt.Chart(data[data['Scheduler name'] == scheduler_name]).mark_line(point=True).encode(
        alt.X(
            'Number of bytes',
            scale=alt.Scale(type='log')
        ),
        alt.Y(
             'Speedup',
        ),
        color='Number of engines:N',
        tooltip='Number of engines',
    
    ).properties(title=scheduler_name, width=800).interactive().display(renderer='svg')

Results for duration[DirectView]/duration[scheduler]

In [12]:
data['combined']=  data['Scheduler name'] + ' ' + data['Number of bytes'].astype(str)
alt.Chart(data[data['Scheduler name'] != 'DirectView']).mark_line(point=True).encode(
    alt.X(
        'Number of engines',
            scale=alt.Scale(type='log')
    ),
    alt.Y(
        'Speedup',
    ),
    color='Number of bytes:N',
    strokeDash=alt.StrokeDash(shorthand='Scheduler name', legend=None),
    tooltip='combined',

).properties(title='schedulers vs directView scaling engines', width=800).interactive().display(renderer='svg')
In [13]:
for engine in data['Number of engines'].unique():    
    alt.Chart(data[data['Number of engines'] == engine]).mark_bar().encode(
        x='Scheduler name',
        y='Duration in ms',
        color='Scheduler name:N',
        column='Number of bytes:N',    
        tooltip='Duration in ms'
    ).properties(title=f'Runtime on {engine} engines:').interactive().display(renderer='svg')