dat2=pd.read_csv("C:\\Users\duan\Desktop\PythonDataProcessingVisualization\meanByClass.txt", sep='\s+')dat2Explore a fake gene expression data modified from iris.csv
rpkm=pd.read_csv("C:\\Users\duan\Desktop\PythonDataProcessingVisualization\\fakeExpressionDat.csv")rpkmplt.figure(); dat2.plot(); plt.legend(loc='best')Get rid of the legend
dat2.plot(legend=False)Separate the features
dat2.plot(subplots=True, figsize=(6, 6)); plt.legend(loc='best')Plotting on a Secondary Y-axis
plt.figure()
dat2.WtTypeA.plot(color="b")
dat2.WtTypeB.plot(color="turquoise")
dat2.KOTypeA.plot(color="r")
dat2.KOTypeB.plot(color="pink")
dat2.replicate.plot(secondary_y=True, style='g')Plot a subset of columns
plt.figure()
dat2.WtTypeA.plot(color="b")
dat2.WtTypeB.plot(color="turquoise")
dat2.KOTypeA.plot(color="r")
dat2.KOTypeB.plot(color="pink")Selective Plotting on Secondary Y-axis
plt.figure()
dat3=dat2.drop(['replicate'], axis = 1)
ax = dat3.plot(secondary_y=['wtTypeA', 'KOTypeA'])
ax.set_ylabel('TypeB scale')
ax.right_ax.set_ylabel('TypeA scale')Targeting different subplots by passing an ax argument
fig, axes = plt.subplots(nrows=2, ncols=2)
dat2['WtTypeA'].plot(ax=axes[0,0]); axes[0,0].set_title('WtTypeA')
dat2['KOTypeA'].plot(ax=axes[0,1]); axes[0,1].set_title('KOTypeA')
dat2['WtTypeB'].plot(ax=axes[1,0]); axes[1,0].set_title('WtTypeB')
dat2['KOTypeB'].plot(ax=axes[1,1]); axes[1,1].set_title('KOTypeB')Adjusting spacing between subplots
fig, axes = plt.subplots(nrows=2, ncols=2)
dat2['WtTypeA'].plot(ax=axes[0,0]); axes[0,0].set_title('WtTypeA')
dat2['KOTypeA'].plot(ax=axes[0,1]); axes[0,1].set_title('KOTypeA')
dat2['WtTypeB'].plot(ax=axes[1,0]); axes[1,0].set_title('WtTypeB')
dat2['KOTypeB'].plot(ax=axes[1,1]); axes[1,1].set_title('KOTypeB')
plt.subplots_adjust(left=0.1,
bottom=0.1,
right=0.9,
top=0.9,
wspace=0.4,
hspace=0.4)Looking at one replicate a time
plt.figure();
dat2.iloc[1].plot(kind='bar'); plt.axhline(0, color='k')Looking at all replicates at the same time
plt.figure();
dat2.plot(kind='bar'); plt.axhline(0, color='k')plt.figure();
dat2.plot(kind='bar', colormap='Greens')stacked boxes
dat3.plot(kind='bar', stacked=True);plt.figure()
dat.hist(by="genotype", figsize=(6, 4),bins=20)from pandas.plotting import scatter_matrix
rpkm=pd.read_csv("C:\\Users\duan\Desktop\IntroductionToMatplotlib\\fakeExpressionDat.csv")
rpkmscatter_matrix(rpkm, alpha=0.9, figsize=(6, 6), diagonal='kde')Parallel coordinates is a plotting technique for plotting multivariate data. It allows one to see clusters in data and to estimate other statistics visually. Using parallel coordinates points are represented as connected line segments. Each vertical line represents one attribute. One set of connected line segments represents one data point. Points that tend to cluster will appear closer together
from pandas.plotting import parallel_coordinates
plt.figure()
parallel_coordinates(rpkm, 'pathway')from pandas.plotting import parallel_coordinates
plt.figure()
parallel_coordinates(rpkm, 'pathway',colormap='gist_rainbow')from pandas.plotting import parallel_coordinates
plt.figure()
parallel_coordinates(rpkm, 'pathway',colormap='spring')from pandas.plotting import parallel_coordinates
plt.figure()
parallel_coordinates(rpkm, 'pathway',colormap='autumn')Andrews Curves are smoothed versions of Parallel Coordinates
from pandas.plotting import andrews_curvesplt.figure()
andrews_curves(rpkm, 'pathway')
plt.show()A potential issue when plotting a large number of columns is that it can be difficult to distinguish some series due to repetition in the default colors. To remedy this, we can either loop through different colors using rainbow() function. Or DataFrame plotting supports the use of the colormap= argument, which accepts either a Matplotlib colormap or a string that is a name of a colormap registered with Matplotlib
plt.figure()
andrews_curves(rpkm, 'pathway',color = [cm.rainbow(i) for i in np.linspace(0, 1, 3)])
plt.show()plt.figure()
andrews_curves(rpkm, 'pathway',colormap='jet')
plt.show()plt.figure()
andrews_curves(rpkm, 'pathway',colormap="winter")
plt.show()from pandas.plotting import radviz
plt.figure()
radviz(rpkm, 'pathway')
plt.show()from pandas.plotting import radviz
plt.figure()
radviz(rpkm, 'pathway',colormap="Set1")
plt.show()


























Matplotlib is the primary plotting library in Python. It makes easy things easy, and hard things possible. You can provide it lists or numpy arrays and it can generate virtually any plot you'd like.
Please refer to the Matplotlib introduction page here.
import numpy as npfrom numpy.random import randnimport matplotlib.pyplot as pltimport pandas as pdfrom pandas import Series, DataFrame,date_rangefrom matplotlib import cmdat=pd.read_csv("C:\\Users\duan\Desktop\PythonDataProcessingVisualization\metaDataMean.txt", sep='\s+')dat%matplotlib inlineplt.figure()
dat.samplemeans.plot()plt.figure()
dat.samplemeans.plot(secondary_y=True, style='g')For list of named colors see here
Change plotting style and add legend
plt.figure();dat.samplemeans.plot(style='k--', label='sample means'); plt.legend()plt.figure();
dat.samplemeans.plot(kind='bar')add a horizontal line
plt.figure();
dat.samplemeans.plot(kind='bar'); plt.axhline(10,linewidth=4, color='r')plot horizontal bars
plt.figure();
dat.samplemeans.plot(kind='barh',color="pink")plt.figure();
dat.samplemeans.plot(kind='barh',color = [cm.rainbow(i) for i in np.linspace(0, 1, len(dat.samplemeans))])plt.figure();
dat.samplemeans.hist()plt.figure();
bp = dat.boxplot()Define a title
plt.figure();
bp=dat.boxplot()
bp.set_title('My box plot')Boxplot by celltype
plt.figure();
bp = dat.boxplot(by='genotype')Better layout by excluding the automatic title
plt.figure();
bp = dat.boxplot(by='genotype')
plt.suptitle('') # that's what you're after
plt.show()More sophisticated plotting can better reveal the trend
plt.figure();
bp = dat.boxplot(column=['samplemeans'], by=['celltype','genotype'])
title_boxplot = 'expression means'
plt.title( title_boxplot )
plt.suptitle('')
plt.show()












