knitr::opts_chunk$set(echo=TRUE, root.dir = "../")
#knitr::opts_knit$set(root.dir = "../")
source("Rsource/Hub_microbiome_workshop_source.R")
library(microbiome)
library(ggplot2)
library(ggpubr)
library(knitr)

Create a phyloseq object from DADA2 output

infile_asv_counts<-"Rdata/demo_asv_counts.tsv"
infile_asv_tax<-"Rdata/demo_asvs_taxonomy.tsv"
infile_sample_data<-"Rdata/made_up_sample_data.tsv"

df_asv_counts<-read.delim(infile_asv_counts)
df_asv_tax<-read.delim(infile_asv_tax)
df_sample_data<-read.delim(infile_sample_data)
#Convert the data to matrix format
m_asv_counts<-as.matrix(df_asv_counts[2:length(df_asv_counts)])
rownames(m_asv_counts)<-df_asv_counts[,1]

m_asv_tax<-as.matrix(df_asv_tax[2:length(df_asv_tax)])
rownames(m_asv_tax)<-df_asv_tax[,1]

##There are 3 possible components to a phyloseq object: otu_table, sample_data, tax_table

pseq<-phyloseq(otu_table(m_asv_counts,taxa_are_rows = T),tax_table(m_asv_tax),sample_data(df_sample_data))


##Lets look at the data in our phyloseq object
##The functions used to create the components of a phyloseq object
##can be used to view the components of a phyloseq object
View(otu_table(pseq)@.Data)
View(microbiome::meta(pseq))
View(tax_table(pseq)@.Data)

Read in a dataset with interesting results and perform some initial processing

infile<-"Rdata/otu_table.biom"
infile_meta<-"Rdata/meta_data.csv"


pseq<-read_biom2phyloseq(infile,metadata.file = infile_meta)

####Let's filter out some of the samples to simplify the analysis####
pseq<-subset_samples(pseq,group_name=="Without.comp.food" & diet !="Experimental infant formula")
####Summarize data at the Genus level. Not required, but convienent####
pseq<-aggregate_taxa(pseq,level="Genus")

Inspect the phyloseq object

View(otu_table(pseq)@.Data)
View(microbiome::meta(pseq)) 
#Alternative method View(sample_data(pseq))
View(tax_table(pseq)@.Data)

Calculate alpha diversity from milk study

Remember alpha diversity is calculated within a sample so normalization is not neccessary

tab<-microbiome::diversity(pseq,index="all")
kable(tab)
inverse_simpson gini_simpson shannon fisher coverage
12021.BF.31.2 1.461086 0.3155776 0.7334518 2.764950 1
12021.BF.35.2 3.239409 0.6913017 1.4592188 4.316774 2
12021.BF.36.2 1.209815 0.1734276 0.4126838 1.926832 1
12021.BF.40.2 3.314990 0.6983399 1.5670103 3.967838 2
12021.BP.14.2 1.316280 0.2402834 0.6508000 4.193167 1
12021.BP.39.2 1.347396 0.2578277 0.6238770 3.716607 1
12021.BP.39.3 1.394276 0.2827821 0.7055823 3.781214 1
12021.EP.44.2 3.365172 0.7028384 1.5589336 4.433606 2
12021.BF.24.2 2.526996 0.6042732 1.1008349 3.514795 1
12021.EF.72.2 2.496498 0.5994389 1.3141284 3.934783 1
12021.EP.73.2 1.268385 0.2115956 0.5902808 3.367423 1
12021.EF.43.2 2.890154 0.6539977 1.6377025 5.441104 1
12021.EF.18.2 4.096520 0.7558903 1.7336332 4.706827 2
12021.EF.18.3 4.506792 0.7781127 2.0434085 4.501478 2
12021.EF.3.2 3.236652 0.6910388 1.7216149 5.177305 1
12021.EP.21.2 3.109226 0.6783766 1.5413540 4.758156 2
12021.BF.2.3 3.907144 0.7440586 1.8177519 2.780483 2
12021.BF.32.2 2.844232 0.6484113 1.4098474 4.108981 1
12021.BP.14.3 2.506362 0.6010153 1.5259038 3.992907 1
12021.BF.13.3 3.113530 0.6788211 1.4755282 4.222052 2
12021.BF.14.2 1.815080 0.4490600 1.0369118 4.848389 1
12021.BF.27.3 2.983619 0.6648365 1.5698604 4.327041 1
12021.BP.13.2 3.842460 0.7397500 1.7151828 3.688179 2
12021.BP.30.2 1.130083 0.1151096 0.2888944 2.724956 1
12021.BP.36.2 1.636226 0.3888376 0.9279660 3.347275 1
12021.BP.36.3 2.829877 0.6466278 1.6325278 5.011669 1
12021.EF.34.2 3.199228 0.6874246 1.4182747 5.132423 2
12021.EF.43.3 2.772195 0.6392750 1.5234099 4.756638 1
12021.EF.56.2 5.024868 0.8009898 1.9333194 4.758426 2
12021.EP.11.2 5.512141 0.8185823 1.9482030 5.340070 3
12021.EP.11.3 4.333263 0.7692270 1.8614695 5.052094 2
12021.EP.21.3 4.868894 0.7946145 1.9279716 5.046062 2
12021.EP.39.3 3.389778 0.7049954 1.6301370 4.516335 2
12021.EP.57.2 2.777151 0.6399188 1.4065867 4.547713 1
12021.EF.75.3 2.591677 0.6141494 1.2782498 4.530516 1
12021.EP.2.2 2.673321 0.6259335 1.4773889 5.037734 1
12021.EP.29.2 3.633452 0.7247796 1.6710140 6.401069 2
12021.EP.70.3 2.891594 0.6541699 1.3999026 5.194287 1
12021.BP.31.2 2.657929 0.6237672 1.5945862 4.517235 1
12021.BF.23.2 3.931692 0.7456566 1.7785290 4.712511 2
12021.BP.22.2 1.160414 0.1382388 0.3943185 3.369552 1
12021.EP.39.2 3.954261 0.7471082 1.8022514 4.693744 2
12021.EF.54.2 3.140337 0.6815628 1.5221985 6.326166 2
12021.BP.31.3 2.533217 0.6052450 1.4851213 4.360590 1
12021.EF.63.2 2.697340 0.6292644 1.5830571 5.767317 1
12021.EF.63.3 1.693944 0.4096617 1.1229086 5.111896 1
12021.EP.2.3 1.601809 0.3757060 0.7573976 2.181124 1
12021.BP.23.3 1.319199 0.2419644 0.5438181 2.890437 1
12021.EF.75.2 2.096718 0.5230642 1.2058485 5.514221 1
12021.BF.13.2 3.902346 0.7437439 1.8001431 5.024132 2
12021.BF.7.2 2.349271 0.5743361 1.4671624 4.989032 1
12021.EF.48.2 3.196198 0.6871283 1.3842727 4.304816 2
12021.EF.48.3 5.915751 0.8309598 2.0115561 4.525398 3
12021.BP.30.3 1.574503 0.3648789 0.8245899 3.066261 1
12021.EF.13.2 1.877121 0.4672692 1.1608239 4.293382 1
12021.BP.10.2 2.101399 0.5241266 1.0757043 2.905318 1
12021.BP.23.2 1.467612 0.3186209 0.6800216 2.939048 1
12021.BF.9.2 1.444252 0.3076002 0.7877636 3.005226 1
12021.BP.4.2 2.776562 0.6398424 1.5383686 3.706074 1
12021.EP.70.2 3.135717 0.6810936 1.4509156 3.904831 2
12021.BF.27.2 1.304069 0.2331691 0.6358506 3.651137 1
12021.EF.1.2 2.001443 0.5003606 1.0494527 3.414295 1
12021.BP.8.3 1.375470 0.2729759 0.6691807 3.261206 1
12021.EP.23.2 2.759147 0.6375691 1.4449632 4.823316 1
12021.BF.2.2 2.052458 0.5127794 1.1499245 2.621351 1
12021.BP.8.2 1.882113 0.4686823 1.0043024 4.435313 1
##Let's create a dataframe for plotting the results
df_milk_meta<-microbiome::meta(pseq)
##Add Shannon divesity data to the dataframe
df_milk_meta$Shannon<-tab$shannon

##Let's create a plot comparing the diversity between breast milk and formula
plot_div<-ggviolin(df_milk_meta,x="diet",y="Shannon",
             add="boxplot",fill="diet")
##Add stats (Wilcoxon test)
plot_div<-plot_div+stat_compare_means(comparisons = list(c("Breast milk","Standard infant formula")),
                          method="wilcox.test")
print(plot_div)

##Diversity is higher in the standard infant formula group

Composition plot

##Compute the relative level for each taxa
pseq.rel<-microbiome::transform(pseq,"compositional")
##Remove rare OTUs
pseq.rel.core<-core(pseq.rel,detection = 0,prevalence = 0.5)
##aggregate taxa to the class level (so data will fit on graph)
pseq.rel.core.family<-aggregate_taxa(pseq.rel.core,level="Family")

##Recommend to save as 10X10
comp_plot<-plot_composition(pseq.rel.core.family,group_by="diet")+
  guides(fill=guide_legend(ncol=1))+
    labs(x = "Samples", y = "Relative abundance",
     title = "Relative abundance data")
print(comp_plot)