knitr::opts_chunk$set(echo=TRUE, root.dir = "../")
#knitr::opts_knit$set(root.dir = "../")
source("Rsource/Hub_microbiome_workshop_source.R")
library(microbiome)
library(ggplot2)
library(ggpubr)
library(knitr)
infile_asv_counts<-"Rdata/demo_asv_counts.tsv"
infile_asv_tax<-"Rdata/demo_asvs_taxonomy.tsv"
infile_sample_data<-"Rdata/made_up_sample_data.tsv"
df_asv_counts<-read.delim(infile_asv_counts)
df_asv_tax<-read.delim(infile_asv_tax)
df_sample_data<-read.delim(infile_sample_data)
#Convert the data to matrix format
m_asv_counts<-as.matrix(df_asv_counts[2:length(df_asv_counts)])
rownames(m_asv_counts)<-df_asv_counts[,1]
m_asv_tax<-as.matrix(df_asv_tax[2:length(df_asv_tax)])
rownames(m_asv_tax)<-df_asv_tax[,1]
##There are 3 possible components to a phyloseq object: otu_table, sample_data, tax_table
pseq<-phyloseq(otu_table(m_asv_counts,taxa_are_rows = T),tax_table(m_asv_tax),sample_data(df_sample_data))
##Lets look at the data in our phyloseq object
##The functions used to create the components of a phyloseq object
##can be used to view the components of a phyloseq object
View(otu_table(pseq)@.Data)
View(microbiome::meta(pseq))
View(tax_table(pseq)@.Data)
infile<-"Rdata/otu_table.biom"
infile_meta<-"Rdata/meta_data.csv"
pseq<-read_biom2phyloseq(infile,metadata.file = infile_meta)
####Let's filter out some of the samples to simplify the analysis####
pseq<-subset_samples(pseq,group_name=="Without.comp.food" & diet !="Experimental infant formula")
####Summarize data at the Genus level. Not required, but convienent####
pseq<-aggregate_taxa(pseq,level="Genus")
View(otu_table(pseq)@.Data)
View(microbiome::meta(pseq))
#Alternative method View(sample_data(pseq))
View(tax_table(pseq)@.Data)
Remember alpha diversity is calculated within a sample so normalization is not neccessary
tab<-microbiome::diversity(pseq,index="all")
kable(tab)
inverse_simpson | gini_simpson | shannon | fisher | coverage | |
---|---|---|---|---|---|
12021.BF.31.2 | 1.461086 | 0.3155776 | 0.7334518 | 2.764950 | 1 |
12021.BF.35.2 | 3.239409 | 0.6913017 | 1.4592188 | 4.316774 | 2 |
12021.BF.36.2 | 1.209815 | 0.1734276 | 0.4126838 | 1.926832 | 1 |
12021.BF.40.2 | 3.314990 | 0.6983399 | 1.5670103 | 3.967838 | 2 |
12021.BP.14.2 | 1.316280 | 0.2402834 | 0.6508000 | 4.193167 | 1 |
12021.BP.39.2 | 1.347396 | 0.2578277 | 0.6238770 | 3.716607 | 1 |
12021.BP.39.3 | 1.394276 | 0.2827821 | 0.7055823 | 3.781214 | 1 |
12021.EP.44.2 | 3.365172 | 0.7028384 | 1.5589336 | 4.433606 | 2 |
12021.BF.24.2 | 2.526996 | 0.6042732 | 1.1008349 | 3.514795 | 1 |
12021.EF.72.2 | 2.496498 | 0.5994389 | 1.3141284 | 3.934783 | 1 |
12021.EP.73.2 | 1.268385 | 0.2115956 | 0.5902808 | 3.367423 | 1 |
12021.EF.43.2 | 2.890154 | 0.6539977 | 1.6377025 | 5.441104 | 1 |
12021.EF.18.2 | 4.096520 | 0.7558903 | 1.7336332 | 4.706827 | 2 |
12021.EF.18.3 | 4.506792 | 0.7781127 | 2.0434085 | 4.501478 | 2 |
12021.EF.3.2 | 3.236652 | 0.6910388 | 1.7216149 | 5.177305 | 1 |
12021.EP.21.2 | 3.109226 | 0.6783766 | 1.5413540 | 4.758156 | 2 |
12021.BF.2.3 | 3.907144 | 0.7440586 | 1.8177519 | 2.780483 | 2 |
12021.BF.32.2 | 2.844232 | 0.6484113 | 1.4098474 | 4.108981 | 1 |
12021.BP.14.3 | 2.506362 | 0.6010153 | 1.5259038 | 3.992907 | 1 |
12021.BF.13.3 | 3.113530 | 0.6788211 | 1.4755282 | 4.222052 | 2 |
12021.BF.14.2 | 1.815080 | 0.4490600 | 1.0369118 | 4.848389 | 1 |
12021.BF.27.3 | 2.983619 | 0.6648365 | 1.5698604 | 4.327041 | 1 |
12021.BP.13.2 | 3.842460 | 0.7397500 | 1.7151828 | 3.688179 | 2 |
12021.BP.30.2 | 1.130083 | 0.1151096 | 0.2888944 | 2.724956 | 1 |
12021.BP.36.2 | 1.636226 | 0.3888376 | 0.9279660 | 3.347275 | 1 |
12021.BP.36.3 | 2.829877 | 0.6466278 | 1.6325278 | 5.011669 | 1 |
12021.EF.34.2 | 3.199228 | 0.6874246 | 1.4182747 | 5.132423 | 2 |
12021.EF.43.3 | 2.772195 | 0.6392750 | 1.5234099 | 4.756638 | 1 |
12021.EF.56.2 | 5.024868 | 0.8009898 | 1.9333194 | 4.758426 | 2 |
12021.EP.11.2 | 5.512141 | 0.8185823 | 1.9482030 | 5.340070 | 3 |
12021.EP.11.3 | 4.333263 | 0.7692270 | 1.8614695 | 5.052094 | 2 |
12021.EP.21.3 | 4.868894 | 0.7946145 | 1.9279716 | 5.046062 | 2 |
12021.EP.39.3 | 3.389778 | 0.7049954 | 1.6301370 | 4.516335 | 2 |
12021.EP.57.2 | 2.777151 | 0.6399188 | 1.4065867 | 4.547713 | 1 |
12021.EF.75.3 | 2.591677 | 0.6141494 | 1.2782498 | 4.530516 | 1 |
12021.EP.2.2 | 2.673321 | 0.6259335 | 1.4773889 | 5.037734 | 1 |
12021.EP.29.2 | 3.633452 | 0.7247796 | 1.6710140 | 6.401069 | 2 |
12021.EP.70.3 | 2.891594 | 0.6541699 | 1.3999026 | 5.194287 | 1 |
12021.BP.31.2 | 2.657929 | 0.6237672 | 1.5945862 | 4.517235 | 1 |
12021.BF.23.2 | 3.931692 | 0.7456566 | 1.7785290 | 4.712511 | 2 |
12021.BP.22.2 | 1.160414 | 0.1382388 | 0.3943185 | 3.369552 | 1 |
12021.EP.39.2 | 3.954261 | 0.7471082 | 1.8022514 | 4.693744 | 2 |
12021.EF.54.2 | 3.140337 | 0.6815628 | 1.5221985 | 6.326166 | 2 |
12021.BP.31.3 | 2.533217 | 0.6052450 | 1.4851213 | 4.360590 | 1 |
12021.EF.63.2 | 2.697340 | 0.6292644 | 1.5830571 | 5.767317 | 1 |
12021.EF.63.3 | 1.693944 | 0.4096617 | 1.1229086 | 5.111896 | 1 |
12021.EP.2.3 | 1.601809 | 0.3757060 | 0.7573976 | 2.181124 | 1 |
12021.BP.23.3 | 1.319199 | 0.2419644 | 0.5438181 | 2.890437 | 1 |
12021.EF.75.2 | 2.096718 | 0.5230642 | 1.2058485 | 5.514221 | 1 |
12021.BF.13.2 | 3.902346 | 0.7437439 | 1.8001431 | 5.024132 | 2 |
12021.BF.7.2 | 2.349271 | 0.5743361 | 1.4671624 | 4.989032 | 1 |
12021.EF.48.2 | 3.196198 | 0.6871283 | 1.3842727 | 4.304816 | 2 |
12021.EF.48.3 | 5.915751 | 0.8309598 | 2.0115561 | 4.525398 | 3 |
12021.BP.30.3 | 1.574503 | 0.3648789 | 0.8245899 | 3.066261 | 1 |
12021.EF.13.2 | 1.877121 | 0.4672692 | 1.1608239 | 4.293382 | 1 |
12021.BP.10.2 | 2.101399 | 0.5241266 | 1.0757043 | 2.905318 | 1 |
12021.BP.23.2 | 1.467612 | 0.3186209 | 0.6800216 | 2.939048 | 1 |
12021.BF.9.2 | 1.444252 | 0.3076002 | 0.7877636 | 3.005226 | 1 |
12021.BP.4.2 | 2.776562 | 0.6398424 | 1.5383686 | 3.706074 | 1 |
12021.EP.70.2 | 3.135717 | 0.6810936 | 1.4509156 | 3.904831 | 2 |
12021.BF.27.2 | 1.304069 | 0.2331691 | 0.6358506 | 3.651137 | 1 |
12021.EF.1.2 | 2.001443 | 0.5003606 | 1.0494527 | 3.414295 | 1 |
12021.BP.8.3 | 1.375470 | 0.2729759 | 0.6691807 | 3.261206 | 1 |
12021.EP.23.2 | 2.759147 | 0.6375691 | 1.4449632 | 4.823316 | 1 |
12021.BF.2.2 | 2.052458 | 0.5127794 | 1.1499245 | 2.621351 | 1 |
12021.BP.8.2 | 1.882113 | 0.4686823 | 1.0043024 | 4.435313 | 1 |
##Let's create a dataframe for plotting the results
df_milk_meta<-microbiome::meta(pseq)
##Add Shannon divesity data to the dataframe
df_milk_meta$Shannon<-tab$shannon
##Let's create a plot comparing the diversity between breast milk and formula
plot_div<-ggviolin(df_milk_meta,x="diet",y="Shannon",
add="boxplot",fill="diet")
##Add stats (Wilcoxon test)
plot_div<-plot_div+stat_compare_means(comparisons = list(c("Breast milk","Standard infant formula")),
method="wilcox.test")
print(plot_div)
##Diversity is higher in the standard infant formula group
##Compute the relative level for each taxa
pseq.rel<-microbiome::transform(pseq,"compositional")
##Remove rare OTUs
pseq.rel.core<-core(pseq.rel,detection = 0,prevalence = 0.5)
##aggregate taxa to the class level (so data will fit on graph)
pseq.rel.core.family<-aggregate_taxa(pseq.rel.core,level="Family")
##Recommend to save as 10X10
comp_plot<-plot_composition(pseq.rel.core.family,group_by="diet")+
guides(fill=guide_legend(ncol=1))+
labs(x = "Samples", y = "Relative abundance",
title = "Relative abundance data")
print(comp_plot)
####Beta diversity and microbiome divergence
##Result is sensitive to sample size. Recommended to subsample or bootstrap to
##avoid bias
##Rarefying data is inappropriate for differential abundance testing,
##but rarefying gives the best results when comparing sample similarity
##Calculate the divergences within the diet groups
pseq.rarefied<-rarefy_even_depth(pseq,rngseed=8675309,replace=F)
##Calculate beta diversity with respect to the median taxa
##profile for each group
b.breast_milk<-as.numeric(divergence(x=subset_samples(pseq.rarefied,diet=="Breast milk"),y=apply(abundances(subset_samples(pseq.rarefied,diet=="Breast milk")),1,median),method="bray"))
b.formula<-as.numeric(divergence(x=subset_samples(pseq.rarefied,diet=="Standard infant formula"),y=apply(abundances(subset_samples(pseq.rarefied,diet=="Standard infant formula")),1,median),method="bray"))
#b.formula<-divergence(subset_samples(pseq.rarefied,diet="Standard infant formula"))
boxplot(list(Breast_Milk=b.breast_milk,Formula=b.formula))
wilcox.test(x=b.breast_milk,y=b.formula,alternative = "less")
##
## Wilcoxon rank sum test with continuity correction
##
## data: b.breast_milk and b.formula
## W = 181, p-value = 1.651e-06
## alternative hypothesis: true location shift is less than 0
##The breast milk feed infants have more similar microbiomes to each other
##than the forumla fed infants to do each other
####Sample ordination
library(vegan) #Perform statistics on ordination results
##Rarefying data is inappropriate for differential abundance testing,
##but rarefying gives the best results when comparing sample similarity
##Calculate the divergences within the diet groups
pseq.rarefied<-rarefy_even_depth(pseq,rngseed=8675309,replace=F)
##First lets plot the sample ordination
ord_plot<-plot_landscape(pseq.rarefied,method="PCoA",distance = "bray",col="diet")+labs(title="PCoA/Bray-Curtis")
print(ord_plot)
##We will use a PERMONVA to see if there are statistically distinguishable communties
dist<-vegdist(t(otu_table(pseq.rarefied)),method="bray")
##Check if the variances are equal between the groups. If not assumtion for PERMOVNA not met
print(anova(betadisper(dist,microbiome::meta(pseq.rarefied)$diet)))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.48369 0.48369 11.494 0.0012 **
## Residuals 64 2.69320 0.04208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##For this data the assumtion is not met, but here's the code
permonva_results<-adonis(t(otu_table(pseq.rarefied))~diet,data=microbiome::meta(pseq.rarefied),
permutations = 100,method="bray")
print(permonva_results)
##
## Call:
## adonis(formula = t(otu_table(pseq.rarefied)) ~ diet, data = microbiome::meta(pseq.rarefied), permutations = 100, method = "bray")
##
## Permutation: free
## Number of permutations: 100
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## diet 1 1.5153 1.51528 8.3636 0.11558 0.009901 **
## Residuals 64 11.5953 0.18118 0.88442
## Total 65 13.1105 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Show coefficients for the top taxa separating the groups
permonva_result_coefs<-coefficients(permonva_results)
permonva_result_diet<-permonva_result_coefs["diet1",]
top.diet.coef<-permonva_result_diet[rev(order(abs(permonva_result_diet)))[1:5]]
par(mar=c(3,9,2,1)) #Setting margin sizes requires trial and error
barplot(sort(top.diet.coef),horiz=T,las=1,main="Top taxa")
####To use metagenomeSeq need to create a new data object called MRexperiment####
library(metagenomeSeq)
pseq_mrexp<-phyloseq_to_metagenomeSeq(pseq)
###Keep samples with at least 1 count. Keep features with at least 30 count###
pseq_mrexp<-filterData(pseq_mrexp,present=30,depth=1)
###Normalization###
p<-cumNormStat(pseq_mrexp)
pseq_mrexp<-cumNorm(pseq_mrexp,p=p) #Calculate the scaling factors
###Create a design matrix and run statistical model###
phenotype_data<-pData(pseq_mrexp) #Get the metadata associated with this experiment
###Make breast milk the reference group (Reference is the denominator in logFC)###
phenotype_data$diet<-relevel(factor(phenotype_data$diet),"Breast milk")
##A design matrix specicies what comparisons we want to make
##For more on design matrices check this youtube video: https://www.youtube.com/watch?v=CqLGvwi-5Pc
design_matrix<-model.matrix(~1+diet,data=phenotype_data) #Can drop 1, we're just explicitly showing an intercept
feature_fit_model<-fitFeatureModel(obj=pseq_mrexp,mod=design_matrix)
###Identify the differentially abundant microbes###
taxas<-fData(pseq_mrexp)$unique
sig_taxa<-MRcoefs(feature_fit_model,adjustMethod = "fdr",alpha = 0.1,taxa=taxas)
write.csv(sig_taxa,"Routput/sig_taxa_table_metagenomeSeq.csv")
####Graph the results###
##Identify the rows containing the abundance info for the significant taxa
sig_taxa_index<-which(rownames(MRcounts(pseq_mrexp)) %in% rownames(sig_taxa))
##Get the descriptions of the significant taxa
sig_taxa_desc<-rownames(MRcounts(pseq_mrexp))[sig_taxa_index]
##Find data locations for the 2 groups
classIndex<-list(Breast_milk=which(pData(pseq_mrexp)$diet=="Breast milk",arr.ind = T))
classIndex$Infant_formula<-which(pData(pseq_mrexp)=="Standard infant formula",arr.ind = T)
pdf("Rfigs/sig_taxa_metagenomeSeq.pdf")
for(i in seq(1,length(sig_taxa_index))){
taxa_index<-sig_taxa_index[[i]]
taxa_desc<-sig_taxa_desc[[i]]
plotOTU(pseq_mrexp,otu=taxa_index,classIndex = classIndex,main=taxa_desc)
}
dev.off()
## quartz_off_screen
## 2
library(DESeq2)
sample_data(pseq)$diet<-relevel(factor(sample_data(pseq)$diet), "Breast milk")
##convert data to a format used by DESeq (group by diet)
pseq.deseq<-phyloseq_to_deseq2(pseq,~diet)
##Perform normalization and differential abundance test
pseq.deseq<-DESeq(pseq.deseq,test="Wald",fitType = "parametric")
##Get and filter the results
res<-results(pseq.deseq,cooksCutoff = F,pAdjustMethod = "BH",alpha=0.1)
res<-res[complete.cases(res),] ##Remove any NA results
##Write the results to file
res_table<-cbind(as(res,"data.frame"),as(tax_table(pseq)[rownames(res),],"matrix"))
write.csv(res_table,"Routput/sig_taxa_table_deseq2.csv")
##Graph the results
##Normalize the expersion data
pseq.clr<-microbiome::transform(pseq,"clr")
exp_data_clr<-abundances(pseq.clr)
##Keep only the differentially abundant microbes
exp_data_clr_sig<-exp_data_clr[which(rownames(exp_data_clr) %in% rownames(res)),]
exp_data_clr_sig<-melt(exp_data_clr_sig)
colnames(exp_data_clr_sig)<-c("Taxa","Sample","CLR")
##Extract the metadata
df_meta<-microbiome::meta(pseq.clr)
df_meta$Sample<-rownames(df_meta)
df_meta<-df_meta %>% dplyr::select(Sample,diet)
df_dam<-left_join(exp_data_clr_sig,df_meta,by="Sample")
##Create the graphs
pdf("Rfigs/graph_deseq2_sig.pdf")
for(taxon in unique(df_dam$Taxa)){
df_dam_taxon<-df_dam %>% filter(Taxa==taxon)
plot<-ggplot(df_dam_taxon,aes(x=diet,y=CLR))+geom_dotplot(binaxis = "y",stackdir = "center")
plot<-plot+ggtitle(taxon)+theme(plot.title = element_text(hjust=0.5))
print(plot)
}
dev.off()
## quartz_off_screen
## 2
##Normalize the expersion data
pseq.norm<-microbiome::transform(pseq,"compositional")
exp_data_norm<-abundances(pseq.norm)
##Keep only the differentially abundant microbes
exp_data_norm_sig<-exp_data_norm[which(rownames(exp_data_norm) %in% rownames(res)),]
exp_data_norm_sig<-melt(exp_data_norm_sig)
colnames(exp_data_norm_sig)<-c("Taxa","Sample","Relative_Abundance")
##Extract the metadata
df_meta<-microbiome::meta(pseq.norm)
df_meta$Sample<-rownames(df_meta)
df_meta<-df_meta %>% dplyr::select(Sample,diet)
df_dam<-left_join(exp_data_norm_sig,df_meta,by="Sample")
##Create the graphs
pdf("Rfigs/graph_deseq2_sig_ra.pdf")
for(taxon in unique(df_dam$Taxa)){
df_dam_taxon<-df_dam %>% filter(Taxa==taxon)
plot<-ggplot(df_dam_taxon,aes(x=diet,y=Relative_Abundance))+geom_dotplot(binaxis = "y",stackdir = "center")
plot<-plot+ggtitle(taxon)+theme(plot.title = element_text(hjust=0.5))
print(plot)
}
dev.off()
## quartz_off_screen
## 2