Microarray data processing with R/Bioconductor

  • a large number of life sciences software implemented in R

  • an extensive collection of experimental and annotation data that relate to the analysis software.

Documentation

  • The packages are written, documented and supported according to consistent standards.

  • Each package has a website. For example see:

affy

affydata

  • both pdf documentation and example R scripts exits.

Mailing List

Instructions on joining the mailing list

IMPORTANT:Carefully read the posting guide and follow the instructions.

  • The list is high-traffic and widely read so be careful with postings.

UseExample

#Connect to rous.mit.edu and start R
R
#check available packages:
library()
#load affdata
library(affydata)
#Run some of the commands
data(Dilution)
ls()
Dilution
class(Dilution)
expressionData<-exprs(Dilution)
class(expressionData)
expressionData[1:3,]
log2(expressionData[1:3,])
round(log2(expressionData[1:3,]), digits=2)

Processing Microarray Data with r/bioconductor

The following series of commands can be used to process array data with gcrma and do differential expression testing with LPE:

library(affy)
library(gcrma)
library(LPE)
library(affyPLM)

#Set the working directory to the location of your CEL files.

setwd("/Path/TO/CEL_Files")

#Import Data
#Order of samples in the resulting matrix can be specified by the order in the list.

Name_Dat<-ReadAffy(
"Condition1a.CEL", "Condition1b.CEL", "Condition1c.CEL",
"Condition2a.CEL", "Condition2b.CEL", "Condition2c.CEL")

#RNA degradation Work
RNAdeg<-AffyRNAdeg(Name_Dat)
png(file="Name_rnaDeg.png", bg="white")
plotAffyRNAdeg(RNAdeg,cols=c(1:16))
dev.off()

#PLM work
pset1<-fitPLM(Name_Dat)

#RLE plot
png(file="rle.png", bg="white")
par(mar=c(3, 10, 3, 3))
RLE(pset1, main = "RLE for Name", horizontal=TRUE, las=2)
dev.off()

#NUSE plot
png(file="nuse.png", bg="white")
par(mar=c(3, 10, 3, 3))
NUSE(pset1, ylim= c(0.95,1.2), main = "NUSE for Name", horizontal=TRUE, las=2)
dev.off()

#Process the Data
Name_Exp<-gcrma(Name_Dat, fast=FALSE)

Name_Tab<-exprs(Name_Exp)
Name_Tab<-round(Name_Tab, digits=2)
write.table(data.frame(Name_Tab), sep="\t", quote=FALSE, file="Name.txt")

#Differential Expression Testing
#In editor, add "ProbeID" to top of first column and delete the affy control rows that start with AFFX

Name<-read.table("Name.txt", header=TRUE)
attach(Name)
names(Name)

set.seed(0)

#testing the columns by printing the first 3 rows for each condition
#var.Cond1
Name[1:3,c(2,3,4)]
#var.Cond2
Name[1:3,c(5,6,7)]

#LPE tests
var.Cond1<-baseOlig.error(Name[,c(2,3,4)],q=0.01)
var.Cond2<-baseOlig.error(Name[,c(5,6,7)],q=0.01)

lpeVal.Cond1.Cond2<-data.frame(lpe(Name[,c(5,6,7)], Name[,c(2,3,4)], var.Cond1, var.Cond2, probe.set.name = Name$ProbeID))

lpeVal.Cond1.Cond2<-round(lpeVal.Cond1.Cond2, digits=2)

fdrBH.Cond1.Cond2<-fdr.adjust(lpeVal.Cond1.Cond2, adjp="BH")

write.table(lpeVal.Cond1.Cond2, quote=FALSE, sep="\t", file="lpeVal.Cond1.Cond2.txt")
write.table(fdrBH.Cond1.Cond2, quote=FALSE, sep="\t", file="fdrBH.Cond1.Cond2.txt")

#clustering with pvalues

library(pvclust)

Name.pv<-pvclust(Name, method.hclust="ward",
method.dist="correlation", use.cor="pairwise.complete.obs",
nboot=1000, r=seq(.5,1.4,by=.1), store=FALSE, weight=FALSE)

png(filename="Name_PV.png", bg="white",width=960, height=500)
plot(TAM.pv)
dev.off()

Last updated

Massachusetts Institute of Technology