Source code for pharmpy.visualization

# The pharmpy visualization module

# Since the python visualization and plotting landscape is rapidly
# evolving and there are many different modules to choose from
# all visualization API calls should be made from this module so
# that we could start using another API more easily.
# Design conciderations:
# We would like to be able to have interactive plots which currently
# means to select a package that can render to html. The two main
# contenders that can do this are the altair and the bokeh libraries.
# Bokeh seems to have a larger community, but the strength in altair
# is its use of the standard vega-lite format, which decouples the
# creation of a plot from the rendering. Altair plots (or rather vega
# plots) can be changed from the html directly via the online vega
# editor. So using altair for now, but expecting to revisit this
# decision shortly.

# Will provide base functions for creating different types of plots
# or other types of visualizations

import altair as alt
import pandas as pd

_chart_width = 500
_chart_height = 500


[docs]def pharmpy_theme(): return { 'config': { 'axis': { 'labelFontSize': 11, 'titleFontSize': 13, }, 'legend': { 'labelFontSize': 12, 'titleFontSize': 13, }, } }
alt.themes.register('pharmpy', pharmpy_theme) alt.themes.enable('pharmpy')
[docs]def scatter_plot_correlation(df, x, y, title=""): chart = ( alt.Chart(df, width=_chart_width, height=_chart_height) .mark_circle(size=100) .encode(alt.X(x), alt.Y(y), tooltip=[x, y]) .properties( title=title, ) .interactive() ) line = ( alt.Chart(pd.DataFrame({x: [min(df[x]), max(df[x])], y: [min(df[y]), max(df[y])]})) .mark_line() .encode( alt.X(x), alt.Y(y), ) .interactive() ) plot = chart + line plot = plot.configure_title(fontSize=16) plot = plot.configure_axis(labelFontSize=12, titleFontSize=14) return plot
[docs]def scatter_matrix(df): """Scatter matrix plot Each column will be scatter plotted against all columns. """ base = ( alt.Chart(df) .transform_fold(list(df.columns), as_=['key_x', 'value_x']) .transform_fold(list(df.columns), as_=['key_y', 'value_y']) .encode( x=alt.X('value_y:Q', title=None, scale=alt.Scale(zero=False)), y=alt.Y('value_x:Q', title=None, scale=alt.Scale(zero=False)), ) .properties(width=150, height=150) ) plot = ( alt.layer( base.mark_circle(), base.transform_regression('value_y', 'value_x', method='poly', order=4).mark_line( color='red' ), ) .facet( column=alt.Column('key_x:N', sort=list(df.columns), title=None), row=alt.Row('key_y:N', sort=list(reversed(df.columns)), title=None), ) .resolve_scale(x='independent', y='independent') .configure_header(labelFontStyle='bold') ) return plot
[docs]def line_plot(df, x, title='', xlabel='', ylabel='', legend_title=''): """Line plot for multiple lines Parameters ---------- df : pd.DataFrame DataFrame with one x column and multiple columns with y values x Name of the x column title : str Plot title xlabel : str Label of the x-axis ylabel : str Label of the y-axis legend_title : str Title of the legend """ df = df.melt(id_vars=[x]) plot = ( alt.Chart(df) .mark_line() .encode( alt.X(f'{x}:Q', title=xlabel), alt.Y('value:Q', title=ylabel), color=alt.Color( 'variable:N', legend=alt.Legend( title=legend_title, orient='top-left', fillColor='#EEEEEE', padding=10, cornerRadius=10, ), ), ) .properties( title=title, width=800, height=300, ) .configure_legend(labelLimit=0) ) return plot
[docs]def histogram(values, title=""): """Histogram with percentage on y and a rule at mean slider for reducing the number of values used. """ df = pd.DataFrame({values.name: values, 'num': list(range(1, len(values) + 1))}) slider = alt.binding_range(min=1, max=len(values), step=1, name='Number of samples: ') selection = alt.selection_single( bind=slider, fields=['num'], name="num", init={'num': len(values)} ) base = alt.Chart(df).transform_filter('datum.num <= num_num') plot = ( base.transform_joinaggregate(total='count(*)') .transform_calculate(pct='1 / datum.total') .mark_bar() .encode(alt.X(f'{values.name}:Q', bin=True), alt.Y('sum(pct):Q', axis=alt.Axis(format='%'))) .add_selection(selection) .properties(title=title) ) rule = base.mark_rule(color='red').encode(x=f'mean({values.name}):Q', size=alt.value(5)) return plot + rule
[docs]def facetted_histogram(df): """Facet of one histogram per column with cross filter interaction""" brush = alt.selection(type='interval', encodings=['x']) base = ( alt.Chart() .mark_bar() .encode( x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y=alt.Y('count()', axis=alt.Axis(title='')), ) .properties(width=200, height=150) ) background = base.encode(color=alt.value('#ddd')).add_selection(brush) highlight = base.transform_filter(brush) chart = alt.layer(background, highlight, data=df).repeat(column=list(df.columns)) return chart