Visualizing Proteomics Data
Last updated
Last updated
MIT Resources
https://accessibility.mit.eduMassachusetts Institute of Technology
Read in data
ms=pd.read_csv("C:\\Users\duan\Desktop\IntroductionToSeaborn\mass_spec_new.csv")
ms
a = sns.histplot(ms['light Precursor Mz'], bins=20) #simple, right?
a = sns.histplot(data=ms,x='light Precursor Mz', bins=20, kde="TRUE",color="lightseagreen",alpha=0.1)#make the plot more elegant
a = sns.histplot(data=ms,x='light +1 charge mass', bins=20, kde="TRUE",color="red",alpha=0.1) #O no, what could be wrong?
v = ms['light +1 charge mass'].values #get all of the values
v.sort() #sort the values
v #print the values - what's wrong?
ms['light +1 charge mass']=ms['light Precursor Mz']*ms['Precursor Charge'] - ((ms['Precursor Charge']-1)*1.0078)
a = sns.histplot(data=ms,x='light +1 charge mass', bins=20, kde="TRUE",color="red",alpha=0.1)
#Let's see if +2 and +3 charged peptides exhibit a different distribution.
a = sns.histplot(data=ms.loc[ms['Precursor Charge'] == 3], x='light +1 charge mass', bins=10, label='+3',kde="TRUE",color="chocolate",alpha=0.2)
sns.histplot(data=ms.loc[ms['Precursor Charge'] == 2], x='light +1 charge mass', bins=10, ax=a, label='+2',kde="TRUE",color="orchid",alpha=0.1)
a.legend()
#Here, I made a new column that is the ratio of light-to-heavy intensity...the details aren't important. What is important is that we have a new column of values we can plot
ms['Total Area Ratio BT2_HFX_5'] = ms['light BT2_HFX_5 Total Area']/ms['15N BT2_HFX_5 Total Area']
ms['Total Area Ratio BT2_HFX_7'] = ms['light BT2_HFX_7 Total Area']/ms['15N BT2_HFX_7 Total Area']
sns.set_context('poster') #you don't need to do this - it's just to make the figures easier to see
f = pylab.figure(figsize=(20,10))
swarm = sns.swarmplot(x='Protein Gene', y='Total Area Ratio BT2_HFX_5', data=ms.loc[0:100,:], size=6)
pylab.tight_layout()
pylab.savefig('swarmplot.png')
sns.set_context('poster') #you don't need to do this - it's just to make the figures easier to see
f = pylab.figure(figsize=(20,10))
box = sns.boxplot(x='Protein Gene', y='Total Area Ratio BT2_HFX_5', data=ms.loc[0:100,:])
pylab.tight_layout()
pylab.savefig('boxplot.pdf') #this will save your figure!
sns.set_context('poster') #you don't need to do this - it's just to make the figures easier to see
f = pylab.figure(figsize=(20,10))
box = sns.violinplot(x='Protein Gene', y='Total Area Ratio BT2_HFX_5', data=ms.loc[0:30,:])
#pylab.tight_layout()
pylab.savefig('boxplot.pdf') #this will save your figure!