In [None]:
import plotly.graph_objs as go
import plotly.offline as py
import glob
import numpy as np
py.init_notebook_mode()

#### 

First, a few utilities:
* `Data` is a class to contain data to be plotted and ridden from files
* `MyData` is a class which implements the reading from files (usually in the constructor)
* `open_files` is a function open files with data to be plotted

You can find the general schema here: https://vecta.io/app/edit/-Laa83yl4nPcFXhrVpS3

In [None]:
class Data():
    """
    Generic class to contain data
    """

    def __init__(self, lines, name):
        """
        Retrieve data from a list of text lines
        """
        self._name = name

    @property
    def name(self):
        return self._name

    @property
    def fields(self):
        return self._fields

    @property
    def values(self):
        return self._values

    @values.setter
    def values(self, val):
        self._values = val

    @property
    def all_data(self):
        assert len(self.__fields) == len(
            self._values), "Wrong data initialization, fields and values must have the same size"
        return list(zip(self._fields, self._values))


In [None]:
####################################
# REIMPLEMENT THIS CLASS IF NEEDED #
####################################

class MyData(Data):
    """
    Class to contain Precision-Recall and F-measure data
    """

    def __init__(self, lines, name):
        super().__init__(lines, name)

        cleaned = [line.rstrip('\n') for line in lines]

        # looking for the number of pieces
        max_index = 0
        for line in cleaned:
            if line.startswith('Piece number:'):
                max_index = max(max_index, int(line[13:]))

        self._values = np.zeros((3, max_index + 1))
        self._fields = ['Precision', 'Recall', 'F-measure']

        # storing data
        for line in cleaned:
            if line.startswith('Piece number:'):
                index = int(line[13:])
            elif line.startswith('Precision:'):
                self._values[0, index] = float(line[10:])
            elif line.startswith('Recall:'):
                self._values[1, index] = float(line[7:])
            elif line.startswith('F1-measure:'):
                self._values[2, index] = float(line[11:])


In [None]:
def split_fname(fname, leave_dataset_name=False):
    """
    Split a file name removing extensions and adding white spaces
    """
    fname = fname.replace('_', ' ')
    fname = fname.replace('results', '')
    fname = fname.replace('validation', '')
    fname = fname.replace('-', '')
    
    if leave_dataset_name:
        fname = fname.replace('mozart', 'mozart ')
        fname = fname.replace('pop', 'pop ')
    else: 
        fname = fname.replace('mozart', '')
        fname = fname.replace('pop', '')
    return fname[:fname.find('.')]


def open_files(file_names, leave_dataset_name=False):
    """
    Open files and retrieve data

    PARAMS:
        `file_names`: a list of string consisting of file names

    RETURNS:
        a Data object
    """

    data = []

    for fname in file_names:
        with open(fname, 'r') as f:
            lines = f.readlines()
            data_name = split_fname(fname, leave_dataset_name=leave_dataset_name)
            data.append(MyData(lines, data_name))

    return data


### Plots of CNN crossvalidation on Mozart and Pop dataset
* `cl1`: threshold chosen as the upper sample of the 2nd cluster out of 5
* `cl2`: threshold chosen as the upper sample of the 2nd cluster out of 6
* non-specified: threshold chosen as the upper sample of the 1st cluster out of 2
* mono: melody detected by using the graph search

The following is the function used to plot.

Violin plot show a boxplot with the mean in white, the percentiles the confidence interval at 95% and the samples outside it. Also, they show the distribution of the samples in the width. 

In [None]:
from plotly.io import write_image
from scipy.stats import wilcoxon, friedmanchisquare
def plot(data, static=False, title=''):
    """
    Plot the data.

    Params:
        `data`: a list of Data objects
        `static`: if True save to SVG
        `inline`: if True tries to plot in Jupyter Notebook
    """

    if len(data) == 2:
        stat_test = wilcoxon
    else:
        stat_test = friedmanchisquare

    _t = []
    for i in range(len(data[0].fields)):
        _t.append([data[j].values[i] for j in range(len(data))])
        pvalue = [f'{stat_test(*arg)[1]:.3E}' for arg in _t]

    traces = []
    for i, d in enumerate(data):
        x = np.ravel([d.fields] * d.values.shape[1])
        y = np.ravel(d.values)

        traces.append(
            dict(
                type='violin',
                x=x,
                y=y,
                marker=dict(
                    line=dict(
                        color='black',
                        width=1.5),
                ),
                opacity=0.6,
                name=d.name,
                box=dict(
                    visible=True,
                    line=dict(
                        color='black',
                        width=1.5
                    )
                ),
                meanline=dict(
                    visible=True,
                    color='white',
                    width=1.5
                ),
                spanmode='hard',
                scalemode='count'
            )
        )

    layout = go.Layout(
        xaxis=dict(
            tickfont=dict(
                size=18,
            )
        ),
        violinmode='group',
        yaxis=dict(
            zeroline=False
        ),
        annotations=[
            dict(
                x=label,
                y=np.min([d.values for d in data]),
                text='p = ' + pvalue[i],
                showarrow=True,
                ax=0,
                ay=30
            ) for i, label in enumerate(data[0].fields)
        ],
        title=title
    )
    fig = go.Figure(data=traces, layout=layout)

    if static:
        write_image(fig, 'output.svg')
    else:
        py.iplot(fig)

In [None]:
# Change this to save SVG files...
SAVE = False

file_names = sorted(glob.glob('*results*mozart*'))
# file_names = file_names[1:]
data = open_files(file_names)
#plot(data, static=SAVE, title='Mozart crossvalidation')

file_names = sorted(glob.glob('*results*pop*'))
# file_names = file_names[1:]
data = open_files(file_names)
plot(data, static=SAVE, title='Pop crossvalidation')

## Validation
We validated the models trained on the Pop and Mozart datasets on various composers. The number in parenthesis is the number of works available for each composer.

In [None]:
SAVE=True
file_names = sorted(glob.glob('*validation*'))
data = open_files(file_names, leave_dataset_name=True)
plot(data, static=SAVE, title='Validation')

# Computing averages
groups = []
xticks = []
for i in sorted(glob.glob('../data/solo-accompaniment-dataset/validation/*')):
    j = len(glob.glob(i + '/*.pyc.bz'))
    if j > 0:
        groups.append(j)
        xticks.append(i[49:] + ' (' + str(j) + ')')

for d in data:
    new_dvalues = np.zeros((d.values.shape[0], len(groups)))
    k = 0
    for i, j in enumerate(groups):
        new_dvalues[:, i] = np.mean(d.values[:, k:k+j], axis=1)
        k += j
    d.values = new_dvalues
    
traces = []
colors = ['blue', 'orange', 'green', 'red', 'black', 'violet']
axis = [('x1', 'y1'), ('x2', 'y2'), ('x3', 'y3')]
for m in range(data[0].values.shape[0]):
    for i, d in enumerate(data):
        x = list(range(d.values.shape[1]))
        y = d.values[m, :]

        traces.append(
            dict(
                type='scatter',
                mode='lines+markers',
                x=xticks,
                y=y,
                xaxis=axis[m][0],
                yaxis=axis[m][1],
                opacity=0.4,
                name=d.name,
                marker=dict(
                    color=colors[i],
                ),
                showlegend=not m
            )
        )

layout = dict(
    xaxis=dict(
        domain=[0, 0.49],
        anchor=axis[0][1], 
        tickangle=45,
        tickfont=dict(
            size=7
        ),
        side='top'
    ),
    xaxis2=dict(
        domain=[0, 0.49],
        anchor=axis[1][1],
        tickangle=-45,
        tickfont=dict(
            size=7
        ),
    ),
    xaxis3=dict(
        domain=[0.51, 1],
        anchor=axis[2][1],
        title='Average F-measure',
        tickangle=-45,
        tickfont=dict(
            size=7
        ),
    ),
    yaxis=dict(
        domain=[0.51, 1],
        anchor=axis[0][0],
        title='Average Precision',
        zeroline=False,
        dtick=0.1,
    ),
    yaxis2=dict(
        domain=[0, 0.49],
        anchor=axis[1][0],
        title='Average Recall',
        zeroline=False,
        dtick=0.1,
    ),
    yaxis3=dict(
        domain=[0, 1],
        anchor=axis[2][0],
        zeroline=False,
        dtick=0.1,
    ),
)
fig = go.Figure(data=traces, layout=layout)

if SAVE:
    write_image(fig, 'output.svg')
py.iplot(fig)


## Output distributions

These are the input and output pianorolls created with the mozart model. Input uses yellow for the ground-truth melody and grey for the accompaniment. Output uses a heatmap.

In [None]:
from plotly import tools
BINSIZE = 0.05

def iqr(ys):
    quartile_1, quartile_3 = np.percentile(ys, [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
    upper_bound = quartile_3 + (iqr * 1.5)
    return np.where((ys < upper_bound) | (ys > lower_bound))[0]


def crop(dat, x=True, y=True, ref=None):
    if ref is None:
        ref = dat
    # argwhere will give you the coordinates of every non-zero point
    true_points = np.argwhere(ref)
    # take the smallest points and use them as the top left of your crop
    top_left = true_points.min(axis=0)
    # take the largest points and use them as the bottom right of your crop
    bottom_right = true_points.max(axis=0)
    if x:
        dat = dat[:, top_left[1]:bottom_right[1]+1]
    if y:
        dat = dat[top_left[0]:bottom_right[0]+1,:]
    
    return dat


MIN_TH = 1e-15
THRESHOLD = 0.5
def set_threshold(arr, CLUSTERING='centroid'):
    print("starting clustering")
    arr = arr.reshape(-1)
    arr = arr[arr > MIN_TH]
    N_CLUSTER = 2
    target_cluster = 1

    arr = arr[iqr(arr)]

    if CLUSTERING == 'kmeans':
        from sklearn.cluster import KMeans
        kmeans = KMeans(n_clusters=N_CLUSTER,
                        init=np.array([MIN_TH, arr.max()]).reshape(-1, 1))

        labels = kmeans.fit_predict(arr.reshape(-1, 1))
    else:
        import fastcluster
        from scipy.cluster.hierarchy import fcluster
        from scipy.spatial.distance import pdist

        Z = pdist(arr.reshape(-1, 1))
        if CLUSTERING == 'single':
            X = fastcluster.single(Z)
        elif CLUSTERING == 'average':
            X = fastcluster.average(Z)
        elif CLUSTERING == 'centroid':
            X = fastcluster.centroid(Z)
        else:
            return THRESHOLD

        labels = N_CLUSTER - fcluster(X, N_CLUSTER, 'maxclust')

    # setting 0 for the minimum cluster
    # np.ma.masked_array returns only values where the mask is 0
    index = {}
    for i, l in enumerate(labels):
        index[l] = arr[i]
        if len(index.keys()) == N_CLUSTER:
            break

    index = sorted(index.items(), key=lambda kv: kv[1]) # list of tuples sorted by values
    target_label = index[target_cluster - 1][0] # the label of the desired cluster
    th = np.max(arr[np.flatnonzero(labels == target_label)]) # max of the down cluster
    print("found threshold: " + str(th))
    # print(str(np.ma.masked_array(arr, 1 - labels).min()))

    return th


def plot_pianoroll(fname, x0=0, x1=-1, color='YlGnBu'):
    with np.load(fname) as data:
        in_pianoroll = data['in_pianoroll']
        out_pianoroll = data['out_pianoroll']
        melody = data['melody']
        saliency = data['saliency'] 
    
    saliency *= out_pianoroll
    # saliency += np.abs(saliency.min())
    # saliency *= out_pianoroll
    # saliency *= 1 / saliency.max()
    data = [
        dict(
            type='heatmap',
            z=crop((in_pianoroll + melody)/2)[:, x0:x1],
            colorscale='Hot',
            showscale=False,
            y=[np.argwhere(in_pianoroll).min(axis=0)[0]]
        ),
        dict(
            type='heatmap',
            z=crop(out_pianoroll)[:, x0:x1],
            colorscale='Reds',
            showscale=True,
            colorbar=dict(
                len=0.66,
                y=0.66,
            ),
            y=[np.argwhere(out_pianoroll).min(axis=0)[0]],
        ),
        dict(
            type='heatmap',
            z=crop(saliency,ref=out_pianoroll)[:, x0:x1],
            colorscale=color,
            showscale=True,
            colorbar=dict(
                len=0.33,
                y=0.165,
            ),
            y=[np.argwhere(out_pianoroll).min(axis=0)[0]]
        ),
    ]

    fig = tools.make_subplots(
        rows=3, 
        cols=1,
        shared_yaxes=True, 
        shared_xaxes=True, 
        vertical_spacing=0.01, 
        horizontal_spacing=0.01,
    )
    fig.append_trace(data[0], 1, 1)
    fig.append_trace(data[1], 2, 1)
    fig.append_trace(data[2], 3, 1)
    if SAVE:
        write_image(fig, 'output.svg')
    print("Pianorolls of " + fname)
    py.iplot(fig)
        
        
def plot_histograms(fname, x0=0, x1=-1):
    with np.load(fname) as data:
        in_pianoroll = data['in_pianoroll']
        out_pianoroll = crop(data['out_pianoroll'], ref=in_pianoroll)
        melody = crop(data['melody'], ref=in_pianoroll) * out_pianoroll
        
    out_pianoroll = out_pianoroll[:, x0:x1]
    melody = melody[:, x0:x1]
    threshold = set_threshold(out_pianoroll)
        
    data = [
        dict(
            type='histogram',
            x=out_pianoroll[out_pianoroll > 1e-15],
            # histnorm='probability',
            # opacity=0.6,
            xbins=dict(
                size=BINSIZE,
            ),
            name='Any pixel > 0'
        ),
        dict(
            type='histogram',
            x=melody[melody > 1e-15],
            #histnorm='probability',
            #opacity=0.6,
            xbins=dict(
                size=BINSIZE,
            ),
            name='Melody pixels'
        ),
    ]

    hist, _ = np.histogram(data[0]['x'], bins=np.arange(-BINSIZE/2, 1+BINSIZE, BINSIZE))
    max_hist_value = np.max(hist)

    layout = dict(
        barmode='overlay',
        title='Distribution of predicted values for certain pixels',
        shapes=[
            # Line Vertical
            dict(
                type='line',
                x0=threshold,
                y0=0,
                x1=threshold,
                y1=max_hist_value,
                line=dict(
                    color='red',
                    width=3
                ),
            ),
        ],
    )

    fig = go.Figure(data=data, layout=layout)
    if SAVE:
        write_image(fig, 'output.svg')
    print("Histograms of " + fname)
    py.iplot(fig)

In [None]:
SAVE=True
plot_pianoroll('../inspection/gluck_die_sommernacht.npz')
plot_histograms('../inspection/gluck_die_sommernacht.npz')

plot_pianoroll('../inspection/albeniz_tango.npz')
plot_histograms('../inspection/albeniz_tango.npz')

plot_pianoroll('../inspection/liszt_die_glocken_von_marling.npz')
plot_histograms('../inspection/liszt_die_glocken_von_marling.npz')

plot_pianoroll('../inspection/schubert_ave_maria.npz', x0=380, x1=620)
plot_histograms('../inspection/schubert_ave_maria.npz', x0=380, x1=620)

plot_pianoroll('../inspection/mozart_kv475_1.npz', x0=200, x1=700)
plot_histograms('../inspection/mozart_kv475_1.npz', x0=200, x1=700)

## Kernels
Let us plot the kernels after the first  and second layer...

In [None]:
import pickle

def plot_kernels(fname):
    kernels = pickle.load(open(fname, 'rb'), encoding='latin1')
    fig = tools.make_subplots(
        rows=3,
        cols=7, 
        print_grid=False, 
        shared_yaxes=True, 
        shared_xaxes=True, 
        vertical_spacing=0.005, 
        horizontal_spacing=0.005
    )

    for i, kernel in enumerate(kernels[0]):
        data = dict(
                type='heatmap',
                z=kernel[0],
                colorscale='YlGnBu',
                showscale=(i==0)
            )

        col = int(i%7) + 1
        row = int(i/7) + 1
        fig.append_trace(data, row, col)

    py.iplot(fig)
    
plot_kernels('../inspection/nn_kernels_mozart.pkl')
plot_kernels('../inspection/nn_kernels_pop.pkl')

In [None]:
def plot_subkernels(fname):
    kernels = pickle.load(open(fname, 'rb'), encoding='latin1')
    fig = tools.make_subplots(
        rows=21,
        cols=21, 
        print_grid=False, 
        shared_yaxes=True, 
        shared_xaxes=True, 
        vertical_spacing=0.001, 
        horizontal_spacing=0.001
    )

    for i, kernel in enumerate(kernels[1]):
        for j, sub_kernel in enumerate(kernel):
            k = i + j
            data = dict(
                    type='heatmap',
                    z=sub_kernel,
                    colorscale='YlGnBu',
                    showscale=(k==0)
                )

            col = i + 1
            row = j + 1
            fig.append_trace(data, row, col)

    py.iplot(fig)
    
plot_subkernels('../inspection/nn_kernels_mozart.pkl')
plot_subkernels('../inspection/nn_kernels_pop.pkl')

In [None]:
plot_pianoroll('./inspection/gluck_masked_saliency.npz', color='Reds')


In [None]:
def plot_masked_saliency(fname, note, x0=0, x1=-1):    
    with np.load(fname) as data:
        in_pianoroll = data['in_pianoroll']
        out_pianoroll = data['out_pianoroll']
        melody = data['melody']
        masked_history = data['masked']
    
    correction = np.zeros_like(in_pianoroll)
    output = np.zeros_like(in_pianoroll)
    for iteration in masked_history:
        prediction = iteration[0]
        mask = iteration[1]
        if np.all(mask[note[0], note[1]:note[2]]):
            correction[mask == 0] += 1
            output[mask == 0] += np.mean(
                out_pianoroll[note[0], note[1]:note[2]] - prediction[note[0], note[1]:note[2]]
            )
    
    output /= np.clip(correction, 1e-15, None, out=correction)
    output /= np.abs(output).max()
    output *= in_pianoroll
        
    data = [
        dict(
            type='heatmap',
            z=crop((in_pianoroll + melody)/2)[:, x0:x1],
            colorscale='Hot',
            showscale=False,
            y=[np.argwhere(in_pianoroll).min(axis=0)[0]]
        ),
        dict(
            type='heatmap',
            z=crop(out_pianoroll)[:, x0:x1],
            colorscale='Reds',
            showscale=True,
            colorbar=dict(
                len=0.33,
                y=0.85,
            ),
            y=[np.argwhere(out_pianoroll).min(axis=0)[0]],
        ),
        dict(
            type='heatmap',
            z=crop(output,ref=out_pianoroll)[:, x0:x1],
            colorscale='YlGnBu',
            showscale=True,
            colorbar=dict(
                len=0.66,
                y=0.33,
            ),
            y=[np.argwhere(out_pianoroll).min(axis=0)[0]]
        ),
    ]

    fig = tools.make_subplots(
        rows=3, 
        cols=1,
        shared_yaxes=True, 
        shared_xaxes=True, 
        vertical_spacing=0.01, 
        horizontal_spacing=0.01,
    )
    
    fig.append_trace(data[0], 1, 1)
    fig.append_trace(data[1], 2, 1)
    fig.append_trace(data[2], 3, 1)
    fig.layout.shapes=[
    # unfilled Rectangle
        dict(
            type='rect',
            xref='x1',
            yref='y1',
            x0=note[1]-0.49,
            y0=note[0]+1,
            x1=note[2]-0.49,
            y1=note[0]-1,
            line=dict(
                color='green',
            ),
        ),
        dict(
            type='rect',
            xref='x1',
            yref='y2',
            x0=note[1]-0.49,
            y0=note[0]+1,
            x1=note[2]-0.49,
            y1=note[0]-1,
            line=dict(
                color='green',
            ),
        ),
        dict(
            type='rect',
            xref='x1',
            yref='y3',
            x0=note[1]-0.49,
            y0=note[0]+1,
            x1=note[2]-0.49,
            y1=note[0]-1,
            line=dict(
                color='green',
            ),
        ),
    ]

    if SAVE:
        write_image(fig, 'output.svg')
    print("Pianorolls of " + fname)
    py.iplot(fig)

plot_masked_saliency('../inspection/gluck_masked_saliency.npz', (59, 60, 63))

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../../convnet_drawer/'))

In [None]:
from convnet_drawer import Model, Conv2D, Deconv2D

model = Model(input_shape=(128, 64, 1))
model.add(Conv2D(21, (32, 16), (1, 1), padding="valid"))
# model.add(MaxPooling2D((3, 3), strides=(2, 2)))
model.add(Conv2D(441, (32, 16), (1,1), padding="valid"))
# model.add(MaxPooling2D((3, 3), strides=(2, 2)))
model.add(Deconv2D(21, (32, 16), (1, 1), padding="valid"))
model.add(Deconv2D(1, (32, 16), padding="valid"))


# save as svg file
model.save_fig("network.svg")

## Network structure

To see the network structure after editing by hand go to https://vecta.io/app/edit/-La_LUq_4Fld0mlpt2lq

## Graph building
Image: https://vecta.io/app/edit/-La_auXXws0bpTiDvVUf
The graph is built in this way:
1. Create the starting and ending virtual notes (green) and put them at the top and at the bottom of the list of notes
2. Select note 0
3. Set `last offset` = 0
4. Create a branch from the selected note to all the notes with the minor onset so that:
    1. the onset is also $\ge$ than `last offset`
    2. the probability of the note is $\gt$ threshold (red notes)
5. The weight of the branch is $-p$, where $p$ is the probability of the arriving note
6. If there are no notes which satisfy condition `5.`, connect this note to the virtual last note
7. Select the next note with probability $\gt$ than threshold and restart from point `4.`

In the image, green notes are virtual nodes, red notes are notes over threshold, yellow notes are notes under threshold and azure notes are notes over threshold that can't be reached by any path.

## General schema
You can find the general schema here: https://vecta.io/app/edit/-Laa83yl4nPcFXhrVpS3