Source code for pharmpy.visualization

# The pharmpy visualization module

# Since the python visualization and plotting landscape is rapidly
# evolving and there are many different modules to choose from
# all visualization API calls should be made from this module so
# that we could start using another API more easily.
# Design conciderations:
# We would like to be able to have interactive plots which currently
# means to select a package that can render to html. The two main
# contenders that can do this are the altair and the bokeh libraries.
# Bokeh seems to have a larger community, but the strength in altair
# is its use of the standard vega-lite format, which decouples the
# creation of a plot from the rendering. Altair plots (or rather vega
# plots) can be changed from the html directly via the online vega
# editor. So using altair for now, but expecting to revisit this
# decision shortly.

# Will provide base functions for creating different types of plots
# or other types of visualizations

import altair as alt
import pandas as pd

_chart_width = 500
_chart_height = 500


[docs]def pharmpy_theme():
    return {
        'config': {
            'axis': {
                'labelFontSize': 11,
                'titleFontSize': 13,
            },
            'legend': {
                'labelFontSize': 12,
                'titleFontSize': 13,
            },
        }
    }


alt.themes.register('pharmpy', pharmpy_theme)
alt.themes.enable('pharmpy')


[docs]def scatter_plot_correlation(df, x, y, title=""):
    chart = (
        alt.Chart(df, width=_chart_width, height=_chart_height)
        .mark_circle(size=100)
        .encode(alt.X(x), alt.Y(y), tooltip=[x, y])
        .properties(
            title=title,
        )
        .interactive()
    )

    line = (
        alt.Chart(pd.DataFrame({x: [min(df[x]), max(df[x])], y: [min(df[y]), max(df[y])]}))
        .mark_line()
        .encode(
            alt.X(x),
            alt.Y(y),
        )
        .interactive()
    )

    plot = chart + line

    plot = plot.configure_title(fontSize=16)
    plot = plot.configure_axis(labelFontSize=12, titleFontSize=14)
    return plot


[docs]def scatter_matrix(df):
    """Scatter matrix plot

    Each column will be scatter plotted against all columns.
    """

    base = (
        alt.Chart(df)
        .transform_fold(list(df.columns), as_=['key_x', 'value_x'])
        .transform_fold(list(df.columns), as_=['key_y', 'value_y'])
        .encode(
            x=alt.X('value_y:Q', title=None, scale=alt.Scale(zero=False)),
            y=alt.Y('value_x:Q', title=None, scale=alt.Scale(zero=False)),
        )
        .properties(width=150, height=150)
    )

    plot = (
        alt.layer(
            base.mark_circle(),
            base.transform_regression('value_y', 'value_x', method='poly', order=4).mark_line(
                color='red'
            ),
        )
        .facet(
            column=alt.Column('key_x:N', sort=list(df.columns), title=None),
            row=alt.Row('key_y:N', sort=list(reversed(df.columns)), title=None),
        )
        .resolve_scale(x='independent', y='independent')
        .configure_header(labelFontStyle='bold')
    )
    return plot


[docs]def line_plot(df, x, title='', xlabel='', ylabel='', legend_title=''):
    """Line plot for multiple lines

    Parameters
    ----------
    df : pd.DataFrame
         DataFrame with one x column and multiple columns with y values
    x
         Name of the x column
    title : str
         Plot title
    xlabel : str
         Label of the x-axis
    ylabel : str
         Label of the y-axis
    legend_title : str
         Title of the legend

    """
    df = df.melt(id_vars=[x])
    plot = (
        alt.Chart(df)
        .mark_line()
        .encode(
            alt.X(f'{x}:Q', title=xlabel),
            alt.Y('value:Q', title=ylabel),
            color=alt.Color(
                'variable:N',
                legend=alt.Legend(
                    title=legend_title,
                    orient='top-left',
                    fillColor='#EEEEEE',
                    padding=10,
                    cornerRadius=10,
                ),
            ),
        )
        .properties(
            title=title,
            width=800,
            height=300,
        )
        .configure_legend(labelLimit=0)
    )
    return plot


[docs]def histogram(values, title=""):
    """Histogram with percentage on y and a rule at mean
    slider for reducing the number of values used.
    """
    df = pd.DataFrame({values.name: values, 'num': list(range(1, len(values) + 1))})

    slider = alt.binding_range(min=1, max=len(values), step=1, name='Number of samples: ')
    selection = alt.selection_single(
        bind=slider, fields=['num'], name="num", init={'num': len(values)}
    )

    base = alt.Chart(df).transform_filter('datum.num <= num_num')

    plot = (
        base.transform_joinaggregate(total='count(*)')
        .transform_calculate(pct='1 / datum.total')
        .mark_bar()
        .encode(alt.X(f'{values.name}:Q', bin=True), alt.Y('sum(pct):Q', axis=alt.Axis(format='%')))
        .add_selection(selection)
        .properties(title=title)
    )

    rule = base.mark_rule(color='red').encode(x=f'mean({values.name}):Q', size=alt.value(5))

    return plot + rule


[docs]def facetted_histogram(df):
    """Facet of one histogram per column with cross filter interaction"""
    brush = alt.selection(type='interval', encodings=['x'])

    base = (
        alt.Chart()
        .mark_bar()
        .encode(
            x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)),
            y=alt.Y('count()', axis=alt.Axis(title='')),
        )
        .properties(width=200, height=150)
    )

    background = base.encode(color=alt.value('#ddd')).add_selection(brush)

    highlight = base.transform_filter(brush)

    chart = alt.layer(background, highlight, data=df).repeat(column=list(df.columns))

    return chart
Quick search

Table of Contents

Source code for pharmpy.visualization