Creating a Dynamically-Colored Visual with Matplotlib

24 Nov 2019

Creating an Dynamically-Colored Visual with Matplotlib

This project inspired by the following paper:

Ferreira, N., Fisher, D., & Konig, A. C. (2014, April). Sample-oriented task-driven visualizations: allowing users to make better, more confident decisions. In Proceedings of the SIGCHI Conference on Human Factors in Computing Systems (pp. 571-580). ACM. (video)

In the figures below, the color of the bar is based on the amount of data covered (e.g. a gradient ranging from dark blue for the distribution being certainly below this y-axis, to white if the value is certainly contained, to dark red if the value is certainly not contained as the distribution is above the axis). The 95% confidence interval for the data distribution of each bar is catured as the green error bars. The y-value is captured as the thin green line in each plot.

%matplotlib notebook

import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st

import matplotlib as mpl
from matplotlib import cm
import numpy as np
mpl.rcParams['figure.dpi']= 150

np.random.seed(12345)

df = pd.DataFrame([np.random.normal(32000,5000,3650), 
                   np.random.normal(42000,2000,3650), 
                   np.random.normal(43500,2800,3650), 
                   np.random.normal(48000,1400,3650)], 
                  index=[1992,1993,1994,1995]).T

i = list(np.arange(4))
xs = [1992, 1993, 1994, 1995]
means = []
stds = []
for x in xs:
    means.append(df[x].mean())
    stds.append(df[x].std())

Create Visual

def create_visual(yval=30000):
    colors = []
    colormap = plt.get_cmap('seismic_r')
    
    for n, mean in enumerate(means):
        z_score = (yval - mean)/stds[n]
        p_value = st.norm.cdf(z_score)
        colors.append(colormap(p_value))
    
    cbar = plt.colorbar(cm.ScalarMappable(norm=cm.colors.Normalize(), 
                                          cmap=colormap), 
                        orientation='horizontal',
                        shrink=0.5,
                        pad=0.0625,
                        ax=plt.gca())
    for l in cbar.ax.xaxis.get_ticklabels():
        l.set_fontsize(8)
    cbar.ax.tick_params(length=0)
    cbar.outline.set_linewidth(0.25)
    
    barlist = plt.bar(x=i,
                      height=means,
                      yerr=[1.96*std for std in stds],
                      error_kw=dict(lw=1.5, 
                                    capsize=7.5, 
                                    capthick=1.5,
                                    ecolor='green'),
                      edgecolor='k',
                      lw=.25,
                      color=colors,
                      width=.5)
    
    plt.title('Dynamic Coloration Demonstration',
              fontsize=10,
              alpha=0.8)
    
    plt.xticks(i, 
               ('1992', '1993', '1994', '1995'))
    plt.xlim([-0.5,3.5])
    plt.gca().tick_params(length=0)
    
    plt.xticks(fontsize=8,
               alpha=0.8)
    plt.yticks(fontsize=8,
               alpha=0.8)
    
    for spine in plt.gca().spines.values():
        spine.set_visible(False)
    
    plt.gca().tick_params(length=0)
    
    plt.axhline(color='green',
                lw=0.5,
                y=yval)
    
#    fname = 'figures/color_demo_' + str(yval) + '.svg'
#    plt.savefig(fname)

plt.figure(figsize=(6,5))
create_visual(40000)

<IPython.core.display.Javascript object>

plt.figure(figsize=(6,5))
create_visual(35000)

<IPython.core.display.Javascript object>

plt.figure(figsize=(6,5))
create_visual(45000)

<IPython.core.display.Javascript object>

plt.figure(figsize=(6,5))
create_visual(42000)

<IPython.core.display.Javascript object>