setwd('~/Documents/work/paper_write/genome_training/R_training/')
df <- read.table('vehicle_drug_feature_counts.txt',
header = T, sep = '\t', row.names = 1)
dfReads <- df[,6:9]
fpkm <- apply(dfReads, 2,
function(x){x/df$Length *10^9/sum(x)})
##change the col name
colnames(fpkm) <- c('c1','c2','t1','t2')
##save the data
write.table(fpkm, file = 'vehicle_drug_feature_counts.fpkm.txt',
row.names = T, col.names = T, sep = '\t',
quote = F)
plot(fpkm[,1],fpkm[,2])
## change to log2
plot(log2(fpkm[,1]), log2(fpkm[,2]))
## change the points color
plot(log2(fpkm[,1]), log2(fpkm[,2]),
col = 'red', pch = 19)
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21)
## change the lable
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21,
xlab = 'log2(control1_FPKM)',
ylab = 'log2(control2_FPKM)')
## change the direction of the number at y-axis
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21,
xlab = 'log2(control1_FPKM)',
ylab = 'log2(control2_FPKM)', las = 1)
##add regression line
id <- which(is.finite(log2(fpkm[,2])) & is.finite(log2(fpkm[,1])) )
nrow(fpkm)
## [1] 5712
length(id)
## [1] 5023
abline(lm(log2(fpkm[id,2]) ~ log2(fpkm[id,1])))
## change the width of the line
abline(lm(log2(fpkm[id,2]) ~ log2(fpkm[id,1])),
col = 'black', lwd = 3)
library(ggplot2)
fpkm_c <- data.frame(c1 = as.numeric( fpkm[,1] ),
c2 = as.numeric( fpkm[,2] ) )
ggplot(fpkm_c, aes(x = c1, y = c2)) +
geom_point() +
geom_smooth(method = lm, se = F)
## with other color
ggplot(fpkm_c, aes(x = c1, y = c2)) +
geom_point(col = 'red') +
geom_smooth(method = lm, se = F, col = 'navy')
ggplot(log2(fpkm_c), aes(x = c1, y = c2)) +
geom_point() +
geom_density_2d()
## Warning: Removed 689 rows containing non-finite values (stat_density2d).