Source code for henchman.plotting

# -*- coding: utf-8 -*-

'''The plotting module.

Contents:
        show_template
        show
        piechart
        histogram
        scatter
        timeseries
        dendrogram
        feature_importances
'''
import pandas as pd
import numpy as np

from bokeh.models import (ColumnDataSource, HoverTool,
                          Slider, RangeSlider, CheckboxGroup, DateRangeSlider,
                          Range1d, CDSView, Plot, MultiLine,
                          Circle, TapTool, BoxZoomTool, ResetTool, SaveTool)

from bokeh.models.widgets import DataTable, TableColumn, Dropdown
from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges

from bokeh.layouts import column, row
import bokeh.layouts as layouts

from bokeh.plotting import figure

from bokeh.io import output_notebook
from bokeh.io.export import get_screenshot_as_png
import bokeh.io as io

from math import pi

from bokeh.palettes import Category20, Spectral4

from henchman.learning import _raw_feature_importances
from henchman.learning import create_model

from sklearn.metrics import (roc_auc_score, precision_score,
                             recall_score, f1_score, roc_curve)

import networkx as nx


[docs]def show_template(): '''Prints a template for `show`. See :func:`show <henchman.plotting.show>` for details. Example: >>> import henchman.plotting as hplot >>> hplot.show_template() ''' print('show(plot,\n' ' static=False,\n' ' png=False,\n' ' hover=False,\n' ' colors=None,\n' ' width=None,\n' ' height=None,\n' ' title=\'Temporary title\',\n' ' x_axis=\'my xaxis name\',\n' ' y_axis=\'my yaxis name\',\n' ' x_range=(0, 10) or None,\n' ' y_range=(0, 10) or None)\n') return None
def _modify_plot(plot, figargs): '''Add text and modify figure attributes. This is an internal function which allows for figure attributes to be passed into interactive functions. Args: plot (bokeh.figure): The figure to modify figargs (dict[assorted]): A dictionary of width, height, title, x_axis, y_axis, x_range and y_range. ''' if figargs['width'] is not None: plot.width = figargs['width'] if figargs['height'] is not None: plot.height = figargs['height'] if figargs['title'] is not None: plot.title.text = figargs['title'] if figargs['x_axis'] is not None: plot.xaxis.axis_label = figargs['x_axis'] if figargs['y_axis'] is not None: plot.yaxis.axis_label = figargs['y_axis'] if figargs['x_range'] is not None: plot.x_range = Range1d(figargs['x_range'][0], figargs['x_range'][1]) if figargs['y_range'] is not None: plot.y_range = Range1d(figargs['y_range'][0], figargs['y_range'][1]) return plot
[docs]def show(plot, png=False, static=False, hover=True, width=None, height=None, title=None, x_axis=None, y_axis=None, x_range=None, y_range=None, colors=None, fig=False): '''Format and show a bokeh plot. This is a wrapper around bokeh show which can add common plot attributes like height, axis labels and whether or not you would like the output as a png. This function also runs the bokeh function ``output_notebook()`` to start. You can get a full list of options by function with ``show_template()``. Args: plot (function): The plot to show. static (bool): If True, show a static bokeh plot. hover (bool): If True, show the hovertool. Default is True. width (int, optional): Plot width. height (int, optional): Plot height. title (str, optional): The title for the plot. x_axis (str, optional): The x_axis label. y_axis (str, optional): The y_axis label. x_range (tuple[int, int], optional): A min and max x value to plot. y_range (tuple[int, int], optional): A min and max y value to plot. colors (list[str], optional): A list of colors to use for the plot. png (bool): If True, return a png of the plot. Default is False fig (bool, advanced): If True, return a bokeh figure instead of showing the plot. Only use if you want to manipulate the bokeh figure directly. Example: >>> import henchman.plotting as hplot >>> hplot.show_template() show(plot, static=False, png=False, hover=True, width=None, height=None, title='Temporary title', x_axis='my xaxis name', y_axis='my yaxis name', x_range=(0, 10) or None, y_range=(0, 10) or None, colors=None) >>> hplot.show(plot, width=500, title='My Plot Title') >>> hplot.show(plot, png=True, static=True) ''' output_notebook(hide_banner=True) figargs = {'static': static, 'png': png, 'hover': hover, 'width': width, 'height': height, 'title': title, 'x_axis': x_axis, 'y_axis': y_axis, 'x_range': x_range, 'y_range': y_range, 'colors': colors} figure = plot(figargs=figargs) if fig: return figure if png: figargs['static'] = True return get_screenshot_as_png(plot(figargs=figargs), driver=None) return io.show(figure)
def gridplot(plots, n_cols=1): '''Create a gridplot. This is a wrapper around bokeh gridplot meant to easily work with henchman plots. Note that the figures must be ``static`` for this to work. This function call is a work in progress and will likely be depreciated in favor of something stable. Args: plots (list[bokeh.figure]): The plots to show. Either a list or a list of lists. n_cols (int): The number of columns. This will be ignored if a list of lists is passed in. Example: >>> import henchman.plotting as hplot >>> p1 = hplot.show(plot, static=True, fig=True) >>> p2 = hplot.show(plot, static=True, fig=True) >>> hplot.gridplot([p1, p2], n_cols=2) ''' output_notebook(hide_banner=True) if isinstance(plots[0], list): return io.show(layouts.gridplot(plots)) return io.show(layouts.gridplot(plots, ncols=n_cols))
[docs]def piechart(col, sort=True, mergepast=None, drop_n=None, figargs=None): '''Creates a piechart. Finds all of the unique values in a column and makes a piechart out of them. By default, this will make a dynamic piechart with sliders for the different parameters. Args: col (pd.Series): The column from which to make the piechart. sort (bool): Whether or not to sort by frequency for static plot. Default is True. mergepast (int): Merge infrequent column values for static plot. Default is 10. drop_n (int): How many high frequency values to drop for static plot. Default is None. Example: If the dataframe ``X`` has a column named ``car_color``: >>> import henchman.plotting as hplot >>> plot = hplot.piechart(X['car_color']) >>> hplot.show(plot) For a static plot: >>> import henchman.plotting as hplot >>> plot = hplot.piechart(X['car_color'], sort=False) >>> hplot.show(plot, static=True) ''' if figargs is None: return lambda figargs: piechart(col, sort, mergepast, drop_n, figargs) source = ColumnDataSource(_make_piechart_source(col, mergepast, sort, drop_n, figargs)) plot = _make_piechart_plot(source, figargs) plot = _modify_plot(plot, figargs) if figargs['static']: return plot def modify_doc(doc, col, sort, mergepast, drop_n, figargs): def callback(attr, old, new): try: source.data = ColumnDataSource( _make_piechart_source(col, sort=sorted_button.active, mergepast=merge_slider.value, drop_n=drop_slider.value, figargs=figargs)).data except Exception as e: print(e) sorted_button, merge_slider, drop_slider = _piechart_widgets( col, sort, mergepast, drop_n, callback) doc.add_root( column(row(column(merge_slider, drop_slider), sorted_button), plot)) return lambda doc: modify_doc(doc, col, sort, mergepast, drop_n, figargs)
[docs]def histogram(col, y=None, n_bins=10, col_max=None, col_min=None, normalized=False, figargs=None): '''Creates a histogram. This function takes a single input and creates a histogram from it. There is an optional second column input for labels, if you would like to see how a label is distributed relative to your numeric variable. Args: col (pd.Series): The column from which to make a histogram. y (pd.Series, optional): A binary label that you would like to track. n_bins (int): The number of bins of the histogram. Default is 10. col_max (float): Maximum value to include in histogram. col_min (float): Minimum value to include in histogram. normalized (bool): Whether or not to normalize the columns. Default is False. Example: If the dataframe ``X`` has a column named ``amount`` and a label ``y``, you can compare them with >>> import henchman.plotting as hplot >>> plot1 = hplot.histogram(X['amount'], y, normalized=False) >>> hplot.show(plot1) If you wanted a single variable histogram instead, omit y: >>> plot2 = hplot.histogram(X['amount'], col_max=200, n_bins=20) >>> hplot.show(plot2) ''' if figargs is None: return lambda figargs: histogram( col, y, n_bins, col_max, col_min, normalized, figargs=figargs) source = ColumnDataSource(_make_histogram_source(col, y, n_bins, col_max, col_min, normalized)) plot = _make_histogram_plot(y, source, figargs) plot = _modify_plot(plot, figargs) if figargs['static']: return plot def modify_doc(doc, col, y, n_bins, col_max, col_min, normalized, figargs): def callback(attr, old, new): try: source.data = ColumnDataSource(_make_histogram_source( col, y, n_bins=slider.value, col_max=range_select.value[1], col_min=range_select.value[0], normalized=normalized)).data except Exception as e: print(e) slider, range_select = _histogram_widgets(col, y, n_bins, col_max, col_min, callback) doc.add_root(column(slider, range_select, plot)) return lambda doc: modify_doc(doc, col, y, n_bins, col_max, col_min, normalized, figargs)
[docs]def timeseries(col_1, col_2, col_max=None, col_min=None, n_bins=10, aggregate='mean', figargs=None): '''Creates a time based aggregations of a numeric variable. This function allows for the user to mean, count, sum or find the min or max of a second variable with regards to a timeseries. Args: col_1 (pd.Series): The column from which to create bins. Must be a datetime. col_2 (pd.Series): The column to aggregate. col_max (pd.datetime): The maximum value for the x-axis. Default is None. col_min (pd.datetime): The minimum value for the x-axis. Default is None. n_bins (int): The number of time bins to make. aggregate (str): What aggregation to do on the numeric column. Options are 'mean', 'sum', 'count', 'max' and 'min'. Default is 'mean'. Example: If the dataframe ``X`` has a columns named ``amount`` and ``date``. >>> import henchman.plotting as hplot >>> plot = hplot.timeseries(X['date'], X['amount']) >>> hplot.show(plot) For a bokeh plot without sliders: >>> plot2 = hplot.timeseries(X['date'], X['amount'], n_bins=50) >>> hplot.show(plot2, static=True) ''' if figargs is None: return lambda figargs: timeseries(col_1, col_2, col_max, col_min, n_bins, aggregate, figargs=figargs) source = ColumnDataSource(_make_timeseries_source(col_1, col_2, col_max, col_min, n_bins, aggregate)) plot = _make_timeseries_plot(source, figargs) plot = _modify_plot(plot, figargs) if figargs['static']: return plot def modify_doc(doc, col_1, col_2, col_max, col_min, n_bins, aggregate, figargs): def callback(attr, old, new): try: source.data = ColumnDataSource( _make_timeseries_source(col_1, col_2, col_max=range_select.value_as_datetime[1], col_min=range_select.value_as_datetime[0], n_bins=slider.value, aggregate=dropdown.value)).data dropdown.label = dropdown.value except Exception as e: print(e) slider, range_select, dropdown = _timeseries_widgets( col_1, col_2, col_max, col_min, n_bins, aggregate, callback) doc.add_root(column(slider, range_select, dropdown, plot)) return lambda doc: modify_doc( doc, col_1, col_2, col_max, col_min, n_bins, aggregate, figargs)
[docs]def scatter(col_1, col_2, cat=None, label=None, aggregate='last', figargs=None): '''Creates a scatter plot of two variables. This function allows for the display of two variables with an optional argument to groupby. By default, this allows for the user to see what two variable looks like as grouped by another. A standard example would be to look at the "last" row for a column that's changing over time. Args: col_1 (pd.Series): The x-values of the plotted points. col_2 (pd.Series): The y-values of the plotted points. cat (pd.Series, optional): A categorical variable to aggregate by. label (pd.Series, optional): A numeric label to be used in the hovertool. aggregate (str): The aggregation to use. Options are 'mean', 'last', 'sum', 'max' and 'min'. Example: If the dataframe ``X`` has a columns named ``amount`` and ``quantity``. >>> import henchman.plotting as hplot >>> plot = hplot.scatter(X['amount'], X['quantity']) >>> hplot.show(plot) If you would like to see the amount, quantity pair as aggregated by the ``month`` column: >>> plot2 = hplot.scatter(X['date'], X['amount'], cat=X['month'], aggregate='mean') >>> hplot.show(plot2) ''' if figargs is None: return lambda figargs: scatter( col_1, col_2, cat, label, aggregate, figargs=figargs) source = ColumnDataSource(_make_scatter_source(col_1, col_2, cat, label, aggregate)) plot = _make_scatter_plot(col_1, col_2, label, cat, source, figargs) plot = _modify_plot(plot, figargs) if figargs['static']: return plot def modify_doc(doc, col_1, col_2, cat, label, aggregate, figargs): def callback(attr, old, new): try: source.data = ColumnDataSource( _make_scatter_source(col_1, col_2, cat, label, aggregate=dropdown.value)).data dropdown.label = dropdown.value except Exception as e: print(e) dropdown = _scatter_widgets(col_1, col_2, aggregate, callback) if cat is not None: doc.add_root(column(dropdown, plot)) else: doc.add_root(plot) return lambda doc: modify_doc(doc, col_1, col_2, cat, label, aggregate, figargs)
[docs]def feature_importances(X, model, n_feats=5, figargs=None): '''Plot feature importances. Args: X (pd.DataFrame): A dataframe with which you have trained. model: Any fit model with a ``feature_importances_`` attribute. n_feats (int): The number of features to plot. Example: >>> import henchman.plotting as hplot >>> plot = hplot.feature_importances(X, model, n_feats=10) >>> hplot.show(plot) ''' if figargs is None: return lambda figargs: feature_importances(X, model, n_feats, figargs=figargs) feature_imps = _raw_feature_importances(X, model) features = [f[1] for f in feature_imps[0:n_feats]][::-1] importances = [f[0] for f in feature_imps[0:n_feats]][::-1] output_notebook() source = ColumnDataSource(data={'feature': features, 'importance': importances}) plot = figure(y_range=features, height=500, title="Random Forest Feature Importances") plot.hbar(y='feature', right='importance', height=.8, left=0, source=source, color="#008891") plot.toolbar_location = None plot.yaxis.major_label_text_font_size = '10pt' plot = _modify_plot(plot, figargs) return plot
[docs]def roc_auc(X, y, model, pos_label=1, prob_col=1, n_splits=1, figargs=None): '''Plots the reveiver operating characteristic curve. This function creates a fit model and shows the results of the roc curve. Args: X (pd.DataFrame): The dataframe on which to create a model. y (pd.Series): The labels for which to create a model. pos_label (int): Which label to check for fpr and tpr. Default is 1. prob_col (int): The columns of the probs dataframe to use. n_splits (int): The number of splits to use in validation. Example: If the dataframe ``X`` has a binary classification label y: >>> import henchman.plotting as hplot >>> from sklearn.ensemble import RandomForestClassifier >>> plot = hplot.roc_auc(X, y, RandomForestClassifier()) >>> hplot.show(plot) ''' if figargs is None: return lambda figargs: roc_auc(X, y, model, pos_label, prob_col, n_splits, figargs=figargs) (scores, model), df_list = create_model( X, y, model, roc_auc_score, _return_df=True, n_splits=n_splits) probs = model.predict_proba(df_list[1]) fpr, tpr, thresholds = roc_curve(df_list[3], probs[:, prob_col], pos_label=pos_label) tools = ['box_zoom', 'save', 'reset'] plot = figure(tools=tools) plot.line(x=fpr, y=tpr) plot.title.text = 'Receiver operating characteristic' plot.xaxis.axis_label = 'False Positive Rate' plot.yaxis.axis_label = 'True Positive Rate' plot.line(x=fpr, y=fpr, color='red', line_dash='dashed') plot = _modify_plot(plot, figargs) return(plot)
[docs]def dendrogram(D, figargs=None): '''Creates a dendrogram plot. This plot can show full structure of a given dendrogram. Args: D (henchman.selection.Dendrogram): An initialized dendrogram object Examples: >>> from henchman.selection import Dendrogram >>> from henchman.plotting import show >>> import henchman.plotting as hplot >>> D = Dendrogram(X) >>> plot = hplot.dendrogram(D) >>> show(plot) ''' if figargs is None: return lambda figargs: dendrogram(D, figargs=figargs) G = nx.Graph() vertices_source = ColumnDataSource( pd.DataFrame({'index': D.columns.keys(), 'desc': list(D.columns.values())})) edges_source = ColumnDataSource( pd.DataFrame(D.edges[0]).rename( columns={1: 'end', 0: 'start'})) step_source = ColumnDataSource( pd.DataFrame({'step': [0], 'thresh': [D.threshlist[0]], 'components': [len(D.graphs[0])]})) G.add_nodes_from([str(x) for x in vertices_source.data['index']]) G.add_edges_from(zip( [str(x) for x in edges_source.data['start']], [str(x) for x in edges_source.data['end']])) graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0, 0)) graph_renderer.node_renderer.data_source = vertices_source graph_renderer.node_renderer.view = CDSView(source=vertices_source) graph_renderer.edge_renderer.data_source = edges_source graph_renderer.edge_renderer.view = CDSView(source=edges_source) plot = Plot(plot_width=400, plot_height=400, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1)) plot.title.text = "Feature Connectivity" graph_renderer.node_renderer.glyph = Circle( size=5, fill_color=Spectral4[0]) graph_renderer.node_renderer.selection_glyph = Circle( size=15, fill_color=Spectral4[2]) graph_renderer.edge_renderer.data_source = edges_source graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.6, line_width=.5) graph_renderer.edge_renderer.selection_glyph = MultiLine( line_color=Spectral4[2], line_width=3) graph_renderer.node_renderer.hover_glyph = Circle( size=5, fill_color=Spectral4[1]) graph_renderer.selection_policy = NodesAndLinkedEdges() graph_renderer.inspection_policy = NodesAndLinkedEdges() plot.renderers.append(graph_renderer) plot.add_tools( HoverTool(tooltips=[("feature", "@desc"), ("index", "@index"), ]), TapTool(), BoxZoomTool(), SaveTool(), ResetTool()) plot = _modify_plot(plot, figargs) if figargs['static']: return plot def modify_doc(doc, D, figargs): data_table = DataTable(source=step_source, columns=[TableColumn(field='step', title='Step'), TableColumn(field='thresh', title='Thresh'), TableColumn(field='components', title='Components')], height=50, width=400) def callback(attr, old, new): try: edges = D.edges[slider.value] edges_source.data = ColumnDataSource( pd.DataFrame(edges).rename(columns={1: 'end', 0: 'start'})).data step_source.data = ColumnDataSource( {'step': [slider.value], 'thresh': [D.threshlist[slider.value]], 'components': [len(D.graphs[slider.value])]}).data except Exception as e: print(e) slider = Slider(start=0, end=(len(D.edges) - 1), value=0, step=1, title="Step") slider.on_change('value', callback) doc.add_root(column(slider, data_table, plot)) return lambda doc: modify_doc(doc, D, figargs)
[docs]def f1(X, y, model, n_precs=1000, n_splits=1, figargs=None): '''Plots the precision, recall and f1 at various thresholds. This function creates a fit model and shows the precision, recall and f1 results at multiple thresholds. Args: X (pd.DataFrame): The dataframe on which to create a model. y (pd.Series): The labels for which to create a model. n_precs (int): The number of thresholds to sample between 0 and 1. n_splits (int): The number of splits to use in validation. Example: If the dataframe ``X`` has a binary classification label ``y``: >>> import henchman.plotting as hplot >>> from sklearn.ensemble import RandomForestClassifier >>> plot = hplot.f1(X, y, RandomForestClassifier()) >>> hplot.show(plot) ''' if figargs is None: return lambda figargs: f1(X, y, model, n_precs, n_splits, figargs=figargs) (scores, model), df_list = create_model( X, y, model, roc_auc_score, _return_df=True, n_splits=n_splits) probs = model.predict_proba(df_list[1]) threshes = [x/float(n_precs) for x in range(0, n_precs)] precisions = [precision_score(df_list[3], probs[:, 1] > t) for t in threshes] recalls = [recall_score(df_list[3], probs[:, 1] > t) for t in threshes] fones = [f1_score(df_list[3], probs[:, 1] > t) for t in threshes] tools = ['box_zoom', 'save', 'reset'] plot = figure(tools=tools) plot.line(x=threshes, y=precisions, color='green', legend='precision') plot.line(x=threshes, y=recalls, color='blue', legend='recall') plot.line(x=threshes, y=fones, color='red', legend='f1') plot.xaxis.axis_label = 'Threshold' plot.title.text = 'Precision, Recall, and F1 by Threshold' plot = _modify_plot(plot, figargs) return(plot)
# Piechart Utilities # def _make_piechart_source(col, mergepast=None, sort=True, drop_n=None, figargs=None): if mergepast is None: mergepast = col.nunique() values = col.reset_index().groupby(col.name).count() total = float(col.shape[0]) counts = values[values.columns[0]].tolist() percents = [x / total for x in counts] tmp = pd.DataFrame({'names': values.index, 'counts': counts, 'percents': percents}) if sort: tmp = tmp.sort_values(by='counts', ascending=False) if drop_n: tmp = tmp.iloc[drop_n:] tmp['percents'] = tmp['percents']/tmp['percents'].sum() starts = [] ends = [] loc = 0 for perc in tmp['percents']: starts.append(loc) loc += 2*pi*perc ends.append(loc) tmp['starts'] = starts tmp['ends'] = ends if mergepast < tmp.shape[0]: percent = tmp.iloc[mergepast:]['percents'].sum() count = tmp.iloc[mergepast:]['counts'].sum() start = tmp.iloc[mergepast:mergepast+1]['starts'].values end = tmp.iloc[-1:]['ends'].values tmp = pd.concat([tmp.iloc[:mergepast], pd.DataFrame({'names': ['Other'], 'counts': [count], 'percents': [percent], 'starts': start, 'ends': end})]) if figargs['colors'] is None: figargs['colors'] = Category20[20] tmp['colors'] = [figargs['colors'][i % (len(figargs['colors'])-1)] for i, _ in enumerate(tmp['names'])] return tmp def _make_piechart_plot(source, figargs): tools = ['box_zoom', 'save', 'reset'] if figargs['hover']: hover = HoverTool( tooltips=[ ("Name", " @names"), ("Count", " @counts"), ("Percent", " @percents{0%}"), ], mode='mouse') tools = tools + [hover] plot = figure(height=500, tools=tools, toolbar_location='above') plot.wedge(x=0, y=0, radius=0.3, start_angle='starts', end_angle='ends', line_color='white', color='colors', legend='names', source=source) plot.axis.axis_label = None plot.axis.visible = False plot.grid.grid_line_color = None return plot def _piechart_widgets(col, sort, mergepast, drop_n, callback): if sort: active = [0] else: active = [] sorted_button = CheckboxGroup( labels=["Sorted"], active=active) sorted_button.on_change('active', callback) merge_slider = Slider(start=1, end=col.nunique(), value=mergepast or col.nunique(), step=1, title="Merge Slider") merge_slider.on_change('value', callback) drop_slider = Slider(start=0, end=col.nunique(), value=drop_n or 0, step=1, title="Drop Slider") drop_slider.on_change('value', callback) return sorted_button, merge_slider, drop_slider # Timeseries Utilities # def _make_timeseries_source(col_1, col_2, col_max=None, col_min=None, n_bins=10, aggregate='mean'): col_1_time = pd.to_datetime(col_1) if col_max is None: col_max = col_1_time.max() if col_min is None: col_min = col_1_time.min() truncated = col_1_time[(col_1_time <= col_max) & (col_1_time >= col_min)] tmp = pd.DataFrame({col_1.name: truncated, 'height': col_2, 'splits': pd.cut(pd.to_numeric(truncated), n_bins, right=False)}) tmp = tmp.groupby('splits')['height'].aggregate(aggregate).reset_index() tmp['left'] = list(tmp['splits'].apply(lambda x: pd.to_datetime(x.left))) tmp['right'] = list(tmp['splits'].apply(lambda x: pd.to_datetime(x.right))) tmp = tmp[['left', 'right', 'height']] return tmp def _make_timeseries_plot(source, figargs): tools = ['box_zoom', 'save', 'reset'] if figargs['hover']: hover = HoverTool( tooltips=[ ("Height", " @height"), ("Bin", " [@left{%R %F}, @right{%R %F})") ], formatters={ 'left': 'datetime', 'right': 'datetime' }, mode='mouse') tools += [hover] plot = figure(tools=tools, x_axis_type='datetime') if figargs['colors'] is None: plot_color = '#1F77B4' line_color = 'white' else: assert len(figargs['colors']) >= 2 plot_color = figargs['colors'][0] line_color = figargs['colors'][1] plot.quad(top='height', bottom=0, left='left', right='right', color=plot_color, line_color=line_color, source=source, fill_alpha=.5) return plot def _timeseries_widgets(col_1, col_2, col_max, col_min, n_bins, aggregate, callback): col_1_time = pd.to_datetime(col_1) if col_max is None: col_max = col_1_time.max() if col_min is None: col_min = col_1_time.min() slider = Slider(start=1, end=100, value=n_bins, step=1, title="Bins") slider.on_change('value', callback) range_select = DateRangeSlider(start=col_1_time.min(), end=col_1_time.max(), value=(col_min, col_max), step=1, title='Range', format='%R %F') range_select.on_change('value', callback) dropdown = Dropdown(value=aggregate, label=aggregate, button_type="default", menu=[('mean', 'mean'), ('count', 'count'), ('sum', 'sum'), ('max', 'max'), ('min', 'min')]) dropdown.on_change('value', callback) return slider, range_select, dropdown # Histogram Utilities # def _make_histogram_source(col, y, n_bins, col_max, col_min, normalized): if col_max is None: col_max = col.max() if col_min is None: col_min = col.min() truncated = col[(col <= col_max) & (col >= col_min)] hist, edges = np.histogram(truncated, bins=n_bins, density=normalized) if normalized: hist = [height * (edges[1] - edges[0]) for height in hist] cols = pd.DataFrame({'col': col, 'label': y}) tmp = pd.DataFrame({'hist': hist, 'left': edges[:-1], 'right': edges[1:]}) if y is not None: label_hist = np.nan_to_num(cols['label'].groupby( pd.cut(col, edges, right=False)).sum().values) if normalized: label_hist = label_hist / (label_hist.sum()) tmp['label'] = label_hist return tmp def _make_histogram_plot(y, source, figargs): tools = ['box_zoom', 'save', 'reset'] if figargs['hover']: if y is not None: hover = HoverTool( tooltips=[ ("Height", " @hist"), ("Label", " @label"), ("Bin", " [@left{0.00}, @right{0.00})"), ], mode='mouse') else: hover = HoverTool( tooltips=[ ("Height", " @hist"), ("Bin", " [@left{0.00}, @right{0.00})"), ], mode='mouse') tools += [hover] if figargs['colors'] is None: plot_1_color = '#1F77B4' plot_2_color = 'purple' line_color = 'white' else: assert len(figargs['colors']) >= 3 plot_1_color = figargs['colors'][0] plot_2_color = figargs['colors'][1] line_color = figargs['colors'][2] plot = figure(tools=tools) plot.quad(top='hist', bottom=0, left='left', right='right', color=plot_1_color, line_color=line_color, source=source, fill_alpha=.5) if y is not None: plot.quad(top='label', bottom=0, left='left', right='right', color=plot_2_color, line_color=line_color, source=source, fill_alpha=.5) return plot def _histogram_widgets(col, y, n_bins, col_max, col_min, callback): if col_max is None: col_max = col.max() if col_min is None: col_min = col.min() slider = Slider(start=1, end=100, value=n_bins, step=1, title="Bins") slider.on_change('value', callback) range_select = RangeSlider(start=col.min(), end=col.max(), value=(col_min, col_max), step=5, title='Histogram Range') range_select.on_change('value', callback) return slider, range_select # Scatter Utilities # def _make_scatter_source(col_1, col_2, cat=None, label=None, aggregate='last'): tmp = pd.DataFrame({'col_1': col_1, 'col_2': col_2}) if label is not None: tmp['label'] = label if cat is not None: tmp['cat'] = cat tmp = tmp.groupby('cat').aggregate(aggregate).reset_index() return tmp def _make_scatter_plot(col_1, col_2, label, cat, source, figargs): tools = ['box_zoom', 'save', 'reset'] if figargs['hover']: hover = HoverTool(tooltips=[ (col_1.name, ' @col_1'), (col_2.name, ' @col_2'), ]) if label is not None: hover.tooltips += [('label', ' @label')] if cat is not None: hover.tooltips += [('cat', ' @cat')] tools += [hover] radius = (col_1.max() - col_1.min()) / 100. plot = figure(tools=tools) if figargs['colors'] is not None: scatter_color = figargs['colors'][0] else: scatter_color = '#1F77B4' plot.scatter(x='col_1', y='col_2', color=scatter_color, radius=radius, source=source, alpha=.8) return plot def _scatter_widgets(col_1, col_2, aggregate, callback): dropdown = Dropdown(value=aggregate, label=aggregate, button_type="default", menu=[('mean', 'mean'), ('last', 'last'), ('sum', 'sum'), ('max', 'max'), ('min', 'min')]) dropdown.on_change('value', callback) return dropdown