importseabornassnsfrommatplotlib.pyplotimport(figure,subplots_adjust,subplots,setp,gca,tight_layout,)importmatplotlib.pyplotaspltimportsysdefsplit_violin(tidy_data,x_axis,y_axis,split_variable,order=None,ax=None,inner="box",figsize=(8,4),):"""plot ssplit violin plots. General plotting function to produce split violin plots. parameters ---------- tidy_data: DataFrame pandas DataFrame containing the complete data that is to be plotted in a tidy format. x_axis: `str` string identifying which column of the DataFrame is to be plotted on the x-axis y_axis: `str` string identifying which column of the DataFrame is to be plotted on the y-axis split_variable: `str` string identifying which column of the DataFrame is to be used to generate the split violin plot (can only contain two categories of data!) subset_variable_label: `str` string identifiyng which column of the DataFrame contains the variables that should be used to make datasubsets for each plot of the stacked violin plot subset_variabels: `list` list identifying the subsets that should be generated figsize: (width, height) or None | default = (8,4) optional parameter to define the figure size of the plot that is to be generated ax: `axes` | default = None pass the axes class to which your figure should be added, if none is supplied a new figure is generated returns ------- Figure """ax=axorgca()# set plotting stylesns.set_style("white")sns.set_style("ticks")ax=sns.violinplot(x=x_axis,y=y_axis,hue=split_variable,data=tidy_data,palette="muted",split=True,order=order,inner=inner,)# shift spines outwardsax.spines["bottom"].set_linewidth(1)ax.spines["left"].set_linewidth(1)# remove spinessns.despine(offset=10,trim=True)# make labels 90 degrees so they are readibleax.tick_params(labelrotation=90,length=6,width=2)iffigsizeisnotNone:plt.figure(figsize=figsize)returnNone
[docs]defbox_per_ind(plotdata,y_axis,x_axis,order=None,figsize=(4,3.5)):"""plot boxplot with values per individual. General plotting function to produce one or multiple boxplots for average/fraction gene expression per individual/sample. parameters ---------- plotdata: DataFrame pandas DataFrame containing the complete data that is to be plotted. x_axis: `str` string identifying which column of the DataFrame is to be plotted on the x-axis (condition) y_axis: `list` or `str` string identifying which column of the DataFrame is to be plotted on the y-axis (genes) order: `list` list identifying the order for the categories on the x_axis figsize: (width, height) or None | default = (4,3.5) optional parameter to define the figure size of the plot that is to be generated returns ------- Figure """# set plotting stylesns.set_style("white")sns.set_style("ticks")ifisinstance(y_axis,list)==False:y_axis=[y_axis]# only retain elements presenty_axisk=[]foryiny_axis:y_axisk=y_axisk+list(set([y]).intersection(set(plotdata.columns)))y_axis=y_axisk.copy()# check that we have genesiflen(y_axis)==0:sys.exit("Please select valid gene names")# check that we have the conditionif(x_axisinplotdata.columns)==False:sys.exit("Please select a valid condition name")iforder==None:order=list(set(plotdata[x_axis]))# determine number of subplotsnumber_of_subplots=len(y_axis)# initiate figurefig=figure()# adjust size of figure if desirediffigsizeisnotNone:fig.set_figwidth(figsize[0])fig.set_figheight(figsize[1]*number_of_subplots)# adjust amount of space between subplotssubplots_adjust(hspace=0.000)########################################################## plot first figure (this adds the legend above the plot)########################################################## plot figureax0=fig.add_subplot(number_of_subplots,1,1)ax0=sns.boxplot(x=x_axis,y=y_axis[0],data=plotdata,order=order,palette="muted")ax0=sns.stripplot(x=x_axis,y=y_axis[0],data=plotdata,order=order,color="black")ax0.axes.get_xaxis().set_visible(False)ax0.yaxis.tick_right()# get correct label for the y-axisax0.set_ylabel(y_axis[0])# move legend above the plot# ax0.legend(loc=9, bbox_to_anchor=(0.5, 1.5), ncol=2)########################################################## plot all subsequent figures dynamically#########################################################ifnumber_of_subplots>=2:foriinrange(1,number_of_subplots):# get indicator for subplot numberv=i+1# add subplotax1=fig.add_subplot(number_of_subplots,1,v,sharey=ax0)ax1=sns.boxplot(x=x_axis,y=y_axis[i],data=plotdata,order=order,palette="muted")ax1=sns.stripplot(x=x_axis,y=y_axis[i],data=plotdata,order=order,color="black")ax1.axes.get_xaxis().set_visible(False)ax1.yaxis.tick_right()# get correct label for the y-axisax1.set_ylabel(y_axis[i])# remove legend since we only need it once# ax1.get_legend().remove()# set x-axis on the last plot generated to visibleax1.axes.get_xaxis().set_visible(True)ax1.set_xticklabels(ax1.get_xticklabels(),rotation=90)else:ax0.axes.get_xaxis().set_visible(True)ax0.set_xticklabels(ax0.get_xticklabels(),rotation=90)tight_layout()subplots_adjust(hspace=0.000)returnNone
[docs]defstacked_split_violin(tidy_data,x_axis,y_axis,split_variable,subset_variable_label,subset_variables,fig_width=8,fig_height=4,order=None,inner="box",):"""plot stacked split violin plots. General plotting function to produce stacked split violin plots. parameters ---------- tidy_data: DataFrame pandas DataFrame containing the complete data that is to be plotted in a tidy format. x_axis: `str` string identifying which column of the DataFrame is to be plotted on the x-axis y_axis: `str` string identifying which column of the DataFrame is to be plotted on the y-axis split_variable: `str` string identifying which column of the DataFrame is to be used to generate the split violin plot (can only contain two categories of data!) subset_variable_label: `str` string identifiyng which column of the DataFrame contains the variables that should be used to make datasubsets for each plot of the stacked violin plot subset_variabels: `list` list identifying the subsets that should be generated fig_width: width or None | default = 8 optional parameter to define figure width of the plot that is to be generated fig_height: height or None | default = 4 optional parameter to define figure height of the plot that is to be generated order: inner: 'box' or 'quartile' or 'point' or 'stick' define how the datapoints should be displayed in the violin interior, see seaborns documentation for more details returns ------- Figure """# determine number of subplotsnumber_of_subplots=len(subset_variables)# initiate figurefig=figure()fig.set_figwidth(fig_width)fig.set_figheight(fig_height*number_of_subplots)# adjust amount of space between subplotssubplots_adjust(hspace=0.000)########################################################## plot first figure (this adds the legend above the plot)########################################################## generate datasubsetdata=tidy_data[tidy_data.get(subset_variable_label)==subset_variables[0]]# plot figureax0=fig.add_subplot(number_of_subplots,1,1)ax0=sns.violinplot(x=x_axis,y=y_axis,hue=split_variable,data=data,palette="muted",split=True,order=order,inner=inner,)ax0.axes.get_xaxis().set_visible(False)ax0.yaxis.tick_right()# get correct label for the y-axisax0.set_ylabel(subset_variables[0])# move legend above the plotax0.legend(loc=9,bbox_to_anchor=(0.5,1.5),ncol=2)########################################################## plot all subsequent figures dynamically#########################################################ifnumber_of_subplots>=2:foriinrange(1,number_of_subplots):# subset datadata=tidy_data[tidy_data.get(subset_variable_label)==subset_variables[i]]# get indicator for subplot numberv=i+1# add subplotax1=fig.add_subplot(number_of_subplots,1,v,sharey=ax0)ax1=sns.violinplot(x=x_axis,y=y_axis,hue=split_variable,data=data,palette="muted",split=True,order=order,inner=inner,)ax1.axes.get_xaxis().set_visible(False)ax1.yaxis.tick_right()# get correct label for the y-axisax1.set_ylabel(subset_variables[i])# remove legend since we only need it onceax1.get_legend().remove()# set x-axis on the last plot generated to visibleax1.axes.get_xaxis().set_visible(True)ax1.set_xticklabels(ax1.get_xticklabels(),rotation=90)else:ax0.axes.get_xaxis().set_visible(True)ax0.set_xticklabels(ax0.get_xticklabels(),rotation=90)tight_layout()subplots_adjust(hspace=0.000)returnfig
[docs]defflex_dotplot(df,X,Y,HUE,SIZE,title,mycolors="Reds",myfontsize=15,xfactor=0.7,yfactor=0.6,figsize=None,):"""Generate a dot plot showing average expression and fraction positive cells This function generates a plot where X and Y axes are flexible. For each coordinate a circle plot is generated where the size of the circle represents is specifed by SIZE (typically fraction_pos) and the color of the circle is specified by HUE (typically average expression). X and Y axis for stratification is also specified, typically X would be genes and Y cell types. In case of a single gene, Y could be treatments/patients etc. parameters ---------- df: `pandas.DataFrame` a dataframe containing the data to be plotted X: `str` df column to be plotted on X axis Y: `str` df column to be plotted on Y axis HUE: `str` df column corresponding to dot color (e.g. average expression) SIZE: `str` df column corresponding to dot size (e.g. fraction positive) title: `str` plot title mycolors: `str` color palette e.g. Reds or viridis myfontsize: `int` fontsize for the legend defaults to 15 xfactor: `int` distance coef for xaxis defaults to 0.7 yfactor: `int` distance coef for yaxis defaults to 0.6 figsize: (width, height) or None | default = None optional parameter to define the figure size of the plot that is to be generated returns ------- Figure A matplotlib figure element containing the generated plot. To save the figure this plot will need to be passed to a parameter and saved in a second step through the fig.savefig() function call. Examples -------- >>> # import libraries and dataset >>> import besca as bc >>> adata = bc.datasets.simulated_Kotliarov2020_processed() >>> gene = 'Gene_3' >>> df=bc.get_singlegenedf(gene, adata, 'CONDITION','leiden','sampleid') >>> fig = bc.pl.flex_dotplot(df,'CONDITION','leiden','Avg','Fct','study_title') .. plot:: >>> # import libraries and dataset >>> import besca as bc >>> adata = bc.datasets.simulated_Kotliarov2020_processed() >>> # define genes >>> gene = 'Gene_3' >>> df=bc.get_singlegenedf(gene, adata, 'CONDITION','leiden','sampleid') >>> fig = bc.pl.flex_dotplot(df,'CONDITION','leiden','Avg','Fct','study_title') """# set plotting stylesns.set_style("white")sns.set_style("ticks")# check that we have the conditionsif(Xindf.columns)==False:sys.exit("Please select a valid condition name - X")if(Yindf.columns)==False:sys.exit("Please select a valid condition name - Y")if(HUEindf.columns)==False:sys.exit("Please select a valid condition name - HUE")if(SIZEindf.columns)==False:sys.exit("Please select a valid condition name - SIZE")xlen=1+int(df[X].nunique()*xfactor)# to "provide" margin space for the labelsylen=1+int(df[Y].nunique()*xfactor)fig,ax=subplots(figsize=(xlen,ylen))# myplot=sns.scatterplot(data=df,x=X,y=Y,hue=HUE,size=SIZE,sizes=(20,400),palette="viridis",legend="auto")myplot=sns.scatterplot(data=df,x=X,y=Y,hue=HUE,size=SIZE,sizes=(20,300),palette=mycolors)ax.set_title(title,size=myfontsize)ax.set_ylabel(Y,color="grey",fontsize=myfontsize)ax.set_xlabel(X,color="grey",fontsize=myfontsize)setp(myplot.get_xticklabels(),rotation=90,fontsize=myfontsize)setp(myplot.get_yticklabels(),fontsize=myfontsize)setp(ax.get_legend().get_title(),fontsize=myfontsize)# for legend titlesetp(ax.get_legend().get_texts(),fontsize=myfontsize-2)# for legend textiffigsizeisnotNone:plt.figure(figsize=figsize)ax.legend(framealpha=0.4,bbox_to_anchor=(1.01,1),borderaxespad=0.5)mm=my=0.2ifdf[X].nunique()>=10:mm=0.05ifdf[Y].nunique()>=10:my=0.05gca().margins(x=mm)gca().margins(y=my)tight_layout()