df <- read.table('~/Documents/work/paper_write/genome_training/R_training/vehicle_drug_feature_counts.txt',
header = T, sep = '\t', row.names = 1)
df <- df[,6:9]
par(mfrow = c(2,2), mar = c(4,4,1,1))
print( apply(df, 2, function(x) quantile(as.numeric(x))) )
## vehicle_rep1.bam vehicle_rep2.bam drug_rep1.bam drug_rep2.bam
## 0% 0 0 0 0
## 25% 14 16 15 18
## 50% 76 77 80 93
## 75% 200 211 228 262
## 100% 42634 34913 46189 45975
log1 <- apply(df, 2, function(x) {hist(log2(x), breaks = 100)})
idExp <- apply(df, 1, function(x) any(x > 16))
dfExp <- df[idExp, ]
print(dim(dfExp))
## [1] 4542 4
library(DESeq)
condition<- c('c','c', 't', 't')
cds <- newCountDataSet( dfExp, condition )
##normalize the data, extract the size factor for each assay
cds <- estimateSizeFactors(cds)
head( counts( cds, normalized=TRUE ) )
## vehicle_rep1.bam vehicle_rep2.bam drug_rep1.bam
## PF3D7_1314600 69.870870 54.19117 13.70677
## PF3D7_0209400 313.391404 287.01249 219.30829
## PF3D7_1142200 8.220102 20.07080 11.59803
## PF3D7_0507200 44.183050 45.15931 53.77271
## PF3D7_0806000 83.228537 62.21949 60.09891
## PF3D7_1006800 1231.987848 927.27112 1083.88904
## drug_rep2.bam
## PF3D7_1314600 22.852766
## PF3D7_0209400 189.220906
## PF3D7_1142200 2.742332
## PF3D7_0507200 39.306758
## PF3D7_0806000 60.331303
## PF3D7_1006800 1082.307015
##estimate the background gene expression
cds <- estimateDispersions( cds )
##estimate the background distribution
plotDispEsts(cds)
##p-value fit
res <- nbinomTest( cds, 'c', 't' )
plotMA(res)
## extract the genes with adjP < 0.0001
resSig = res[ which(res$padj < 0.0001), ]
dim(resSig)
## [1] 412 8
norData <- counts( cds, normalized=TRUE )
sigNorData <- norData[which(res$padj < 0.0001),]
save(sigNorData, resSig,
file = 'deseq.sig.R')
##extract normalized data
norDF <- counts( cds, normalized=TRUE )
heatMapDf <- norDF[(rownames(norDF) %in% resSig$id), ]
##heatmap plot
##plot heatmap
library("RColorBrewer")
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
hmcol = colorRampPalette(brewer.pal(9, "GnBu"))(100)
## expression heatmap
heatMapDF_nor <- t( apply(heatMapDf, 1, function(x){(x-mean(x))/sd(x)}) )
colnames(heatMapDF_nor) <- c('control1','control2',
'treat1' , 'treat2')
#pdf('aaaaa.pdf', height = 10, width = 10)
heatmap.2(heatMapDF_nor, col = hmcol, trace="none", margin=c(6, 6),labRow = F)
#dev.off()