import seaborn as sns
from matplotlib.pyplot import (
figure,
subplots_adjust,
subplots,
setp,
gca,
tight_layout,
)
import matplotlib.pyplot as plt
import sys
def split_violin(
tidy_data,
x_axis,
y_axis,
split_variable,
order=None,
ax=None,
inner="box",
figsize=(8, 4),
):
"""plot ssplit violin plots.
General plotting function to produce split violin plots.
parameters
----------
tidy_data: DataFrame
pandas DataFrame containing the complete data that is to be plotted in a tidy format.
x_axis: `str`
string identifying which column of the DataFrame is to be plotted on the x-axis
y_axis: `str`
string identifying which column of the DataFrame is to be plotted on the y-axis
split_variable: `str`
string identifying which column of the DataFrame is to be used to generate the
split violin plot (can only contain two categories of data!)
subset_variable_label: `str`
string identifiyng which column of the DataFrame contains the variables that
should be used to make datasubsets for each plot of the stacked violin plot
subset_variabels: `list`
list identifying the subsets that should be generated
figsize: (width, height) or None | default = (8,4)
optional parameter to define the figure size of the plot that is to be generated
ax: `axes` | default = None
pass the axes class to which your figure should be added, if none is supplied a new figure is generated
returns
-------
Figure
"""
ax = ax or gca()
# set plotting style
sns.set_style("white")
sns.set_style("ticks")
ax = sns.violinplot(
x=x_axis,
y=y_axis,
hue=split_variable,
data=tidy_data,
palette="muted",
split=True,
order=order,
inner=inner,
)
# shift spines outwards
ax.spines["bottom"].set_linewidth(1)
ax.spines["left"].set_linewidth(1)
# remove spines
sns.despine(offset=10, trim=True)
# make labels 90 degrees so they are readible
ax.tick_params(labelrotation=90, length=6, width=2)
if figsize is not None:
plt.figure(figsize=figsize)
return None
[docs]def box_per_ind(plotdata, y_axis, x_axis, order=None, figsize=(4, 3.5)):
"""plot boxplot with values per individual.
General plotting function to produce one or multiple boxplots for
average/fraction gene expression per individual/sample.
parameters
----------
plotdata: DataFrame
pandas DataFrame containing the complete data that is to be plotted.
x_axis: `str`
string identifying which column of the DataFrame is to be plotted on the x-axis (condition)
y_axis: `list` or `str`
string identifying which column of the DataFrame is to be plotted on the y-axis (genes)
order: `list`
list identifying the order for the categories on the x_axis
figsize: (width, height) or None | default = (4,3.5)
optional parameter to define the figure size of the plot that is to be generated
returns
-------
Figure
"""
# set plotting style
sns.set_style("white")
sns.set_style("ticks")
if isinstance(y_axis, list) == False:
y_axis = [y_axis]
# only retain elements present
y_axisk = []
for y in y_axis:
y_axisk = y_axisk + list(set([y]).intersection(set(plotdata.columns)))
y_axis = y_axisk.copy()
# check that we have genes
if len(y_axis) == 0:
sys.exit("Please select valid gene names")
# check that we have the condition
if (x_axis in plotdata.columns) == False:
sys.exit("Please select a valid condition name")
if order == None:
order = list(set(plotdata[x_axis]))
# determine number of subplots
number_of_subplots = len(y_axis)
# initiate figure
fig = figure()
# adjust size of figure if desired
if figsize is not None:
fig.set_figwidth(figsize[0])
fig.set_figheight(figsize[1] * number_of_subplots)
# adjust amount of space between subplots
subplots_adjust(hspace=0.000)
#########################################################
# plot first figure (this adds the legend above the plot)
#########################################################
# plot figure
ax0 = fig.add_subplot(number_of_subplots, 1, 1)
ax0 = sns.boxplot(
x=x_axis, y=y_axis[0], data=plotdata, order=order, palette="muted"
)
ax0 = sns.stripplot(
x=x_axis, y=y_axis[0], data=plotdata, order=order, color="black"
)
ax0.axes.get_xaxis().set_visible(False)
ax0.yaxis.tick_right()
# get correct label for the y-axis
ax0.set_ylabel(y_axis[0])
# move legend above the plot
# ax0.legend(loc=9, bbox_to_anchor=(0.5, 1.5), ncol=2)
#########################################################
# plot all subsequent figures dynamically
#########################################################
if number_of_subplots >= 2:
for i in range(1, number_of_subplots):
# get indicator for subplot number
v = i + 1
# add subplot
ax1 = fig.add_subplot(number_of_subplots, 1, v, sharey=ax0)
ax1 = sns.boxplot(
x=x_axis, y=y_axis[i], data=plotdata, order=order, palette="muted"
)
ax1 = sns.stripplot(
x=x_axis, y=y_axis[i], data=plotdata, order=order, color="black"
)
ax1.axes.get_xaxis().set_visible(False)
ax1.yaxis.tick_right()
# get correct label for the y-axis
ax1.set_ylabel(y_axis[i])
# remove legend since we only need it once
# ax1.get_legend().remove()
# set x-axis on the last plot generated to visible
ax1.axes.get_xaxis().set_visible(True)
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
else:
ax0.axes.get_xaxis().set_visible(True)
ax0.set_xticklabels(ax0.get_xticklabels(), rotation=90)
tight_layout()
subplots_adjust(hspace=0.000)
return None
[docs]def stacked_split_violin(
tidy_data,
x_axis,
y_axis,
split_variable,
subset_variable_label,
subset_variables,
fig_width=8,
fig_height=4,
order=None,
inner="box",
):
"""plot stacked split violin plots.
General plotting function to produce stacked split violin plots.
parameters
----------
tidy_data: DataFrame
pandas DataFrame containing the complete data that is to be plotted in a tidy format.
x_axis: `str`
string identifying which column of the DataFrame is to be plotted on the x-axis
y_axis: `str`
string identifying which column of the DataFrame is to be plotted on the y-axis
split_variable: `str`
string identifying which column of the DataFrame is to be used to generate the
split violin plot (can only contain two categories of data!)
subset_variable_label: `str`
string identifiyng which column of the DataFrame contains the variables that
should be used to make datasubsets for each plot of the stacked violin plot
subset_variabels: `list`
list identifying the subsets that should be generated
fig_width: width or None | default = 8
optional parameter to define figure width of the plot that is to be generated
fig_height: height or None | default = 4
optional parameter to define figure height of the plot that is to be generated
order:
inner: 'box' or 'quartile' or 'point' or 'stick'
define how the datapoints should be displayed in the violin interior, see seaborns documentation for more details
returns
-------
Figure
"""
# determine number of subplots
number_of_subplots = len(subset_variables)
# initiate figure
fig = figure()
fig.set_figwidth(fig_width)
fig.set_figheight(fig_height * number_of_subplots)
# adjust amount of space between subplots
subplots_adjust(hspace=0.000)
#########################################################
# plot first figure (this adds the legend above the plot)
#########################################################
# generate datasubset
data = tidy_data[tidy_data.get(subset_variable_label) == subset_variables[0]]
# plot figure
ax0 = fig.add_subplot(number_of_subplots, 1, 1)
ax0 = sns.violinplot(
x=x_axis,
y=y_axis,
hue=split_variable,
data=data,
palette="muted",
split=True,
order=order,
inner=inner,
)
ax0.axes.get_xaxis().set_visible(False)
ax0.yaxis.tick_right()
# get correct label for the y-axis
ax0.set_ylabel(subset_variables[0])
# move legend above the plot
ax0.legend(loc=9, bbox_to_anchor=(0.5, 1.5), ncol=2)
#########################################################
# plot all subsequent figures dynamically
#########################################################
if number_of_subplots >= 2:
for i in range(1, number_of_subplots):
# subset data
data = tidy_data[
tidy_data.get(subset_variable_label) == subset_variables[i]
]
# get indicator for subplot number
v = i + 1
# add subplot
ax1 = fig.add_subplot(number_of_subplots, 1, v, sharey=ax0)
ax1 = sns.violinplot(
x=x_axis,
y=y_axis,
hue=split_variable,
data=data,
palette="muted",
split=True,
order=order,
inner=inner,
)
ax1.axes.get_xaxis().set_visible(False)
ax1.yaxis.tick_right()
# get correct label for the y-axis
ax1.set_ylabel(subset_variables[i])
# remove legend since we only need it once
ax1.get_legend().remove()
# set x-axis on the last plot generated to visible
ax1.axes.get_xaxis().set_visible(True)
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
else:
ax0.axes.get_xaxis().set_visible(True)
ax0.set_xticklabels(ax0.get_xticklabels(), rotation=90)
tight_layout()
subplots_adjust(hspace=0.000)
return fig
[docs]def flex_dotplot(
df,
X,
Y,
HUE,
SIZE,
title,
mycolors="Reds",
myfontsize=15,
xfactor=0.7,
yfactor=0.6,
figsize=None,
):
"""Generate a dot plot showing average expression and fraction positive cells
This function generates a plot where X and Y axes are flexible.
For each coordinate a circle plot is generated where the size of the circle
represents is specifed by SIZE (typically fraction_pos) and the color of the circle is
specified by HUE (typically average expression). X and Y axis for stratification is also specified,
typically X would be genes and Y cell types. In case of a single gene, Y could be treatments/patients etc.
parameters
----------
df: `pandas.DataFrame`
a dataframe containing the data to be plotted
X: `str`
df column to be plotted on X axis
Y: `str`
df column to be plotted on Y axis
HUE: `str`
df column corresponding to dot color (e.g. average expression)
SIZE: `str`
df column corresponding to dot size (e.g. fraction positive)
title: `str`
plot title
mycolors: `str`
color palette e.g. Reds or viridis
myfontsize: `int`
fontsize for the legend defaults to 15
xfactor: `int`
distance coef for xaxis defaults to 0.7
yfactor: `int`
distance coef for yaxis defaults to 0.6
figsize: (width, height) or None | default = None
optional parameter to define the figure size of the plot that is to be generated
returns
-------
Figure
A matplotlib figure element containing the generated plot. To save the figure this plot will need
to be passed to a parameter and saved in a second step through the fig.savefig() function call.
Examples
--------
>>> # import libraries and dataset
>>> import besca as bc
>>> adata = bc.datasets.simulated_Kotliarov2020_processed()
>>> gene = 'Gene_3'
>>> df=bc.get_singlegenedf(gene, adata, 'CONDITION','leiden','sampleid')
>>> fig = bc.pl.flex_dotplot(df,'CONDITION','leiden','Avg','Fct','study_title')
.. plot::
>>> # import libraries and dataset
>>> import besca as bc
>>> adata = bc.datasets.simulated_Kotliarov2020_processed()
>>> # define genes
>>> gene = 'Gene_3'
>>> df=bc.get_singlegenedf(gene, adata, 'CONDITION','leiden','sampleid')
>>> fig = bc.pl.flex_dotplot(df,'CONDITION','leiden','Avg','Fct','study_title')
"""
# set plotting style
sns.set_style("white")
sns.set_style("ticks")
# check that we have the conditions
if (X in df.columns) == False:
sys.exit("Please select a valid condition name - X")
if (Y in df.columns) == False:
sys.exit("Please select a valid condition name - Y")
if (HUE in df.columns) == False:
sys.exit("Please select a valid condition name - HUE")
if (SIZE in df.columns) == False:
sys.exit("Please select a valid condition name - SIZE")
xlen = 1 + int(
df[X].nunique() * xfactor
) # to "provide" margin space for the labels
ylen = 1 + int(df[Y].nunique() * xfactor)
fig, ax = subplots(figsize=(xlen, ylen))
# myplot=sns.scatterplot(data=df,x=X,y=Y,hue=HUE,size=SIZE,sizes=(20,400),palette="viridis",legend="auto")
myplot = sns.scatterplot(
data=df, x=X, y=Y, hue=HUE, size=SIZE, sizes=(20, 300), palette=mycolors
)
ax.set_title(title, size=myfontsize)
ax.set_ylabel(Y, color="grey", fontsize=myfontsize)
ax.set_xlabel(X, color="grey", fontsize=myfontsize)
setp(myplot.get_xticklabels(), rotation=90, fontsize=myfontsize)
setp(myplot.get_yticklabels(), fontsize=myfontsize)
setp(ax.get_legend().get_title(), fontsize=myfontsize) # for legend title
setp(ax.get_legend().get_texts(), fontsize=myfontsize - 2) # for legend text
if figsize is not None:
plt.figure(figsize=figsize)
ax.legend(framealpha=0.4, bbox_to_anchor=(1.01, 1), borderaxespad=0.5)
mm = my = 0.2
if df[X].nunique() >= 10:
mm = 0.05
if df[Y].nunique() >= 10:
my = 0.05
gca().margins(x=mm)
gca().margins(y=my)
tight_layout()