import os
import shutil
import tempfile
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.models.tools import HoverTool
from bokeh.io import output_notebook

output_notebook()


cd ..

/home/vsts/work/1/s/tardis-refdata


def highlight_missing(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'
    
def highlight_relative_difference(val):
    ret = 'background-color: #BCF5A9'
    if val is None:
        ret = 'background-color: #BCF5A9'
    elif val > 1e-2:
        ret = 'background-color: #F2F5A9'
    elif val > 1e-1:
        ret = 'background-color: #F5D0A9'
    elif val > 1:
        ret = 'background-color: #F5A9A9'
    return ret


class ReferenceComparer(object):

    def __init__(self, ref1_hash=None, ref2_hash=None, compare_path='unit_test_data.h5'):
        assert not ((ref1_hash is None) and (ref2_hash is None)), "One hash can not be None"
        self.ref1_hash = ref1_hash
        self.ref2_hash = ref2_hash
        self.compare_path = compare_path
        self.tmp_dir = None
        self.setup()
    
    def setup(self):
        self.tmp_dir = tempfile.mkdtemp()
        print('Created temporary directory at {0}. Delete after use with .teardown'.format(self.tmp_dir))
        for ref_id, ref_hash in enumerate([self.ref1_hash, self.ref2_hash]):
            ref_id += 1
            if ref_hash is not None:
                self._copy_data_from_hash(ref_hash, 'ref{0}_'.format(ref_id))
            else:
                subprocess.Popen('cp {0} {1}'.format(self.compare_path, 
                                                     os.path.join(self.tmp_dir, 
                                                                  'ref{0}_{1}'.format(ref_id, self.compare_path))), 
                                                     shell=True)
            setattr(self, 'ref{0}_fname'.format(ref_id), 
                    os.path.join(self.tmp_dir, 'ref{0}_{1}'.format(ref_id, self.compare_path)))

    def teardown(self):
        shutil.rmtree(self.tmp_dir)

    def _copy_data_from_hash(self, ref_hash, prefix):
        git_cmd = ['git']
        git_cmd.append('--work-tree={0}'.format(self.tmp_dir))
        git_cmd += ['checkout', ref_hash, self.compare_path]
        p = subprocess.Popen(git_cmd)
        p.wait()
        shutil.move(os.path.join(self.tmp_dir, self.compare_path), 
                    os.path.join(self.tmp_dir, prefix + self.compare_path))

    def generate_test_table(self):
        rd1_hdfs = pd.HDFStore(self.ref1_fname, mode='r')
        rd2_hdfs = pd.HDFStore(self.ref2_fname, mode='r')
        rd1_keys = rd1_hdfs.keys()
        rd2_keys = rd2_hdfs.keys()
        rd1_hdfs.close()
        rd2_hdfs.close()
        rd1_df = pd.DataFrame(index=rd1_keys, columns=['exists'])
        rd2_df = pd.DataFrame(index=rd2_keys, columns=['exists'])
        rd1_df['exists'] = True
        rd2_df['exists'] = True
        joined_df = rd1_df.join(rd2_df, how='outer', lsuffix='_1', rsuffix='_2')
        joined_df = joined_df.fillna(False)
        return joined_df
    
    def compare_refdata(self, test_table):
        test_table['match'] = None
        test_table['abs_diff_mean'] = None
        test_table['abs_diff_max'] = None
        test_table['rel_diff_mean'] = None
        test_table['rel_diff_max'] = None
        for row_id, row in test_table.iterrows():
            
            if row[['exists_1', 'exists_2']].all():
                ref1_df = pd.read_hdf(self.ref1_fname, row_id)
                ref2_df = pd.read_hdf(self.ref2_fname, row_id)
                
                if isinstance(ref1_df, pd.Series):
                    try:
                        pd.util.testing.assert_series_equal(ref1_df, ref2_df)
                    except AssertionError:
                        test_table.loc[row_id, 'match'] = False
                        abs_diff = np.fabs(ref1_df - ref2_df)
                        rel_diff = (abs_diff / np.fabs(ref1_df))[ref1_df != 0]
                        test_table.loc[row_id, 'abs_diff_mean'] = abs_diff.mean()
                        test_table.loc[row_id, 'abs_diff_max'] = abs_diff.max()
                        test_table.loc[row_id, 'rel_diff_mean'] = rel_diff.mean()
                        test_table.loc[row_id, 'rel_diff_max'] = rel_diff.max()
                    else:
                        test_table.loc[row_id, 'match'] = True

                elif isinstance(ref1_df, pd.DataFrame):
                    try:
                        pd.util.testing.assert_frame_equal(ref1_df, ref2_df)
                    except AssertionError:
                        test_table.loc[row_id, 'match'] = False
                        abs_diff = np.fabs(ref1_df - ref2_df)
                        rel_diff = (abs_diff / np.fabs(ref1_df))[ref1_df != 0]
                        test_table.loc[row_id, 'abs_diff_mean'] = abs_diff.mean(skipna=True).mean()
                        test_table.loc[row_id, 'abs_diff_max'] = abs_diff.max(skipna=True).max()
                        test_table.loc[row_id, 'rel_diff_mean'] = rel_diff.mean(skipna=True).mean()
                        test_table.loc[row_id, 'rel_diff_max'] = rel_diff.max(skipna=True).max()
                    else:
                        test_table.loc[row_id, 'match'] = True

                else:
                    raise ValueError('Needs to be a Series or DataFrame but is' + str(type(ref1_df)))
        return test_table


comparer = ReferenceComparer(ref2_hash='upstream/master')

Created temporary directory at /tmp/tmp9mtdeep_. Delete after use with .teardown

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/usr/share/miniconda/envs/tardis/lib/python3.8/shutil.py in move(src, dst, copy_function)
    787     try:
--> 788         os.rename(src, real_dst)
    789     except OSError:

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp9mtdeep_/unit_test_data.h5' -> '/tmp/tmp9mtdeep_/ref2_unit_test_data.h5'

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-1-a49c68e1e4b6> in <module>
----> 1 comparer = ReferenceComparer(ref2_hash='upstream/master')

<ipython-input-1-3cdb99616e04> in __init__(self, ref1_hash, ref2_hash, compare_path)
      7         self.compare_path = compare_path
      8         self.tmp_dir = None
----> 9         self.setup()
     10 
     11     def setup(self):

<ipython-input-1-3cdb99616e04> in setup(self)
     15             ref_id += 1
     16             if ref_hash is not None:
---> 17                 self._copy_data_from_hash(ref_hash, 'ref{0}_'.format(ref_id))
     18             else:
     19                 subprocess.Popen('cp {0} {1}'.format(self.compare_path, 

<ipython-input-1-3cdb99616e04> in _copy_data_from_hash(self, ref_hash, prefix)
     33         p = subprocess.Popen(git_cmd)
     34         p.wait()
---> 35         shutil.move(os.path.join(self.tmp_dir, self.compare_path), 
     36                     os.path.join(self.tmp_dir, prefix + self.compare_path))
     37 

/usr/share/miniconda/envs/tardis/lib/python3.8/shutil.py in move(src, dst, copy_function)
    800             rmtree(src)
    801         else:
--> 802             copy_function(src, real_dst)
    803             os.unlink(src)
    804     return real_dst

/usr/share/miniconda/envs/tardis/lib/python3.8/shutil.py in copy2(src, dst, follow_symlinks)
    430     if os.path.isdir(dst):
    431         dst = os.path.join(dst, os.path.basename(src))
--> 432     copyfile(src, dst, follow_symlinks=follow_symlinks)
    433     copystat(src, dst, follow_symlinks=follow_symlinks)
    434     return dst

/usr/share/miniconda/envs/tardis/lib/python3.8/shutil.py in copyfile(src, dst, follow_symlinks)
    259         os.symlink(os.readlink(src), dst)
    260     else:
--> 261         with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
    262             # macOS
    263             if _HAS_FCOPYFILE:

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp9mtdeep_/unit_test_data.h5'


tt = comparer.generate_test_table()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-8117ee489402> in <module>
----> 1 tt = comparer.generate_test_table()

NameError: name 'comparer' is not defined


tt = comparer.compare_refdata(tt)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-197587a6f814> in <module>
----> 1 tt = comparer.compare_refdata(tt)

NameError: name 'comparer' is not defined


tt[["exists_1", "exists_2", 'rel_diff_mean', 'rel_diff_max', 'match']].style.applymap(
    highlight_missing, subset=['exists_1', 'exists_2', 'match']).applymap(
    highlight_relative_difference, subset=['rel_diff_mean', 'rel_diff_max'])

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-49edc6aa32a1> in <module>
----> 1 tt[["exists_1", "exists_2", 'rel_diff_mean', 'rel_diff_max', 'match']].style.applymap(
      2     highlight_missing, subset=['exists_1', 'exists_2', 'match']).applymap(
      3     highlight_relative_difference, subset=['rel_diff_mean', 'rel_diff_max'])

NameError: name 'tt' is not defined


def compare_output_nu(df1, df2, mpl_backend=False):
    nu_min = np.min([df1.min(), df2.min()])
    nu_max = np.max([df1.max(), df2.max()])
    
    if mpl_backend:
        plt.figure(figsize=(14, 6))
        plt.subplot(121)
        plt.plot(df1, df2, ',')
        plt.xlabel("output_nu, ref 1")
        plt.ylabel("output_nu, ref 2")
        plt.subplot(122)
        plt.hist(df1, bins=np.linspace(nu_min, nu_max, 100), histtype="step", label="ref 1")
        plt.hist(df2, bins=np.linspace(nu_min, nu_max, 100), histtype="step", label="ref 2")
        plt.xlabel("output_nu")
        plt.legend(frameon=False)
        
        return

    
    TOOLTIPS = [("(x,y)", "(@x, @y)")]
    hover = HoverTool(tooltips=TOOLTIPS)
    
    p = figure()
    output_nu = ColumnDataSource(pd.DataFrame.from_records({'x': df1.values, 
                                                            'y': df2.values}))
    p.circle('x', 'y', size=1, source=output_nu)
    p.xaxis.axis_label = "output_nu, ref 1"
    p.yaxis.axis_label = "output_nu, ref 2"
    p.xaxis.formatter.precision = 1
    p.yaxis.formatter.precision = 1
    p.add_tools(hover)

    # Step lines are hacky way to make histograms with Bokeh
    arr_hist1, edges1 = np.histogram(df1.values, 
                                     bins = 100, 
                                     range = [nu_min, nu_max])
    arr_hist2, edges2 = np.histogram(df1.values, 
                                     bins = 100, 
                                     range = [nu_min, nu_max])
    
    hist1 = ColumnDataSource(pd.DataFrame.from_records({'x': np.linspace(nu_min, nu_max, 100),
                                                        'y': arr_hist1}))
    hist2 = ColumnDataSource(pd.DataFrame.from_records({'x': np.linspace(nu_min, nu_max, 100),
                                                        'y': arr_hist2}))
    q = figure()
    q.step('x', 'y', source=hist1, legend_label='ref 1')
    q.step('x', 'y', source=hist2, legend_label='ref 2', color='#ff7f0e')
    q.xaxis.axis_label = "output_nu"
    q.xaxis.formatter.precision = 1
    q.legend.click_policy="hide"
    
    # Hover is not working for step line glyph in Bokeh 1.4.0
    q.add_tools(hover)
    
    plot = gridplot([p, q], ncols=2, plot_width=420, plot_height=360)
    
    show(plot)


def compare_spectrum(ref1_nu, ref1_L, ref2_nu, ref2_L, mpl_backend=False):
    
    if mpl_backend:
        plt.figure(figsize=(14, 6))
        plt.subplot(121)
        plt.plot(ref1_nu, ref1_L, label="ref 1")
        plt.plot(ref2_nu, ref2_L, label="ref 2")
        plt.xlabel("nu")
        plt.ylabel("L")
        plt.legend(frameon=False)
        plt.subplot(122)
        plt.plot(ref1_nu, ref1_L / ref2_L)
        plt.xlabel("nu")
        plt.ylabel("L ref 1 / L ref 2")
        
        return
    
    
    TOOLTIPS = [("(x,y)", "(@x, @y)")]
    hover = HoverTool(tooltips=TOOLTIPS)
    
    p = figure()
    spectrum1 = ColumnDataSource(pd.DataFrame.from_records({'x': ref1_nu.values, 
                                                            'y': ref1_L}))
    spectrum2 = ColumnDataSource(pd.DataFrame.from_records({'x': ref2_nu.values, 
                                                            'y': ref2_L}))
    p.line('x', 'y', source=spectrum1, legend_label='ref 1')
    p.line('x', 'y', source=spectrum2, legend_label='ref 2', color='#ff7f0e')
    p.xaxis.axis_label = "L"
    p.yaxis.axis_label = "nu"
    p.xaxis.formatter.precision = 1
    p.yaxis.formatter.precision = 1
    p.legend.click_policy="hide"
    p.add_tools(hover)
    
    q = figure()
    lum_ratio = ColumnDataSource(pd.DataFrame.from_records({'x': ref1_nu.values, 
                                                            'y': ref1_L.values/ref2_L.values}))
    q.circle('x', 'y', size=1, source=lum_ratio)
    q.xaxis.axis_label = "nu"
    q.yaxis.axis_label = "L ref 1 / L ref 2"
    q.xaxis.formatter.precision = 1
    q.yaxis.formatter.precision = 1
    q.add_tools(hover)
    
    
    plot = gridplot([p, q], ncols=2, plot_width=420, plot_height=360)
    
    show(plot)


tmp1 = pd.HDFStore(comparer.ref1_fname, "r")
tmp2 = pd.HDFStore(comparer.ref2_fname, "r")

diff_entries = tt.loc[(tt["match"] == False) & (tt["exists_1"] == True) & (tt["exists_2"] == True)].index

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-2b40df7d3038> in <module>
----> 1 tmp1 = pd.HDFStore(comparer.ref1_fname, "r")
      2 tmp2 = pd.HDFStore(comparer.ref2_fname, "r")
      3 
      4 diff_entries = tt.loc[(tt["match"] == False) & (tt["exists_1"] == True) & (tt["exists_2"] == True)].index

NameError: name 'comparer' is not defined


compare_output_nu(tmp1['/test_simulation/output_nu'], tmp2['/test_simulation/output_nu'])

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-54d5fb0ce665> in <module>
----> 1 compare_output_nu(tmp1['/test_simulation/output_nu'], tmp2['/test_simulation/output_nu'])

NameError: name 'tmp1' is not defined


compare_spectrum(tmp1['/test_runner_simple/spectrum/_frequency'][:-1], 
                 tmp1['/test_runner_simple/spectrum/luminosity'],
                 tmp2['/test_runner_simple/spectrum/_frequency'][:-1], 
                 tmp2['/test_runner_simple/spectrum/luminosity'])

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-0257c581a562> in <module>
----> 1 compare_spectrum(tmp1['/test_runner_simple/spectrum/_frequency'][:-1], 
      2                  tmp1['/test_runner_simple/spectrum/luminosity'],
      3                  tmp2['/test_runner_simple/spectrum/_frequency'][:-1],
      4                  tmp2['/test_runner_simple/spectrum/luminosity'])

NameError: name 'tmp1' is not defined


comparer.teardown()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-2244ed23302f> in <module>
----> 1 comparer.teardown()

NameError: name 'comparer' is not defined

Compare Reference Data¶

Installation¶

Workflow¶

Import libraries¶

Define classes and functions¶

Load data¶

Test table¶

Detailed inspection of the reference data¶

Results¶