0.1 Preprocessing:

Basic dada2 pipeline for 16S:
truncLen=c(200, 180)
trimLeft=c(19,20)
maxEE=c(3,4)
truncQ=2
pool=pseudo
database - SILVA 138.1

Delete all bad taxa with parameters below:
annosighted on the phylum level +
chloroplasts/mithondria +
low reads/richness(based on the plot results)

tree from SEPP-QIIME2-plagin:

qiime tools import
–input-path ref_filt.fasta
–output-path rep_seq.qza
–type ‘FeatureData[Sequence]’ qiime fragment-insertion sepp
–i-representative-sequences rep_seq.qza   –i-reference-database sepp-refs-silva-128.qza
–o-tree insertion-tree.qza
–o-placements insertion-placements.qza
–p-threads 40
unzip -p insertion-tree.qza /data/ > tree.nwk

ITS preprocessing:

dada2 with cutadapt trimming
maxEE=c(5,8)
truncQ = 8
minLen = 50
pool=“pseudo”
UNITE version - 10.05.2021

ML tree from IQ-TREE programm:

mafft –auto FASTA > FASTA_ALIGHNED
iqtree -s FASTA_ALIGHNED -nt 30 -B 1000

0.2 Import

Import phyloseq object and libraries
Dataset - straw decomposition time series - from factor Day - 10 levels Also in the dataset added bulk soils samples.

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(ANCOMBC)
library(heatmaply)
library(compositions)
library(igraph)
library(WGCNA)
require(DESeq2)
require(phyloseq)

ps_bulk <- readRDS("ps_bulk")

0.3 functions

#phyloseq object to ampvis2 object
#https://gist.github.com/KasperSkytte/8d0ca4206a66be7ff6d76fc4ab8e66c6

phyloseq_to_ampvis2 <- function(physeq) {
  #check object for class
  if(!any(class(physeq) %in% "phyloseq"))
    stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
  
  #ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
  if(is.null(physeq@tax_table))
    stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
  
  #OTUs must be in rows, not columns
  if(phyloseq::taxa_are_rows(physeq))
    abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
  else
    abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
  
  #tax_table is assumed to have OTUs in rows too
  tax <- phyloseq::tax_table(physeq)@.Data
  
  #merge by rownames (OTUs)
  otutable <- merge(
    abund,
    tax,
    by = 0,
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  )
  colnames(otutable)[1] <- "OTU"
  
  #extract sample_data (metadata)
  if(!is.null(physeq@sam_data)) {
    metadata <- data.frame(
      phyloseq::sample_data(physeq),
      row.names = phyloseq::sample_names(physeq), 
      stringsAsFactors = FALSE, 
      check.names = FALSE
    )
    
    #check if any columns match exactly with rownames
    #if none matched assume row names are sample identifiers
    samplesCol <- unlist(lapply(metadata, function(x) {
      identical(x, rownames(metadata))}))
    
    if(any(samplesCol)) {
      #error if a column matched and it's not the first
      if(!samplesCol[[1]])
        stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
    } else {
      #assume rownames are sample identifiers, merge at the end with name "SampleID"
      if(any(colnames(metadata) %in% "SampleID"))
        stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
      metadata$SampleID <- rownames(metadata)
      
      #reorder columns so SampleID is the first
      metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
    }
  } else
    metadata <- NULL
  
  #extract phylogenetic tree, assumed to be of class "phylo"
  if(!is.null(physeq@phy_tree)) {
    tree <- phyloseq::phy_tree(physeq)
  } else
    tree <- NULL
  
  #extract OTU DNA sequences, assumed to be of class "XStringSet"
  if(!is.null(physeq@refseq)) {
    #convert XStringSet to DNAbin using a temporary file (easiest)
    fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
    Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
  } else
    fastaTempFile <- NULL
  
  #load as normally with amp_load
  ampvis2::amp_load(
    otutable = otutable,
    metadata = metadata,
    tree = tree,
    fasta = fastaTempFile
  )
}

#variance stabilisation from DESeq2

ps_vst <- function(ps, factor){
  

  
  diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))              
  diagdds = estimateSizeFactors(diagdds, type="poscounts")
  diagdds = estimateDispersions(diagdds, fitType = "local") 
  pst <- varianceStabilizingTransformation(diagdds)
  pst.dimmed <- t(as.matrix(assay(pst))) 
  # pst.dimmed[pst.dimmed < 0.0] <- 0.0
  ps.varstab <- ps
  otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE) 
  return(ps.varstab)
} 

#WGCNA visualisation
#result - list class object with attributes:
# ps - phyloseq object
# amp - ampwis2 object
# heat - heatmap with absalute read numbers
# heat_rel - hetmap with relative abundances
# tree - phylogenetic tree with taxonomy

color_filt <- function(ps, df){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps, function(x) x / sum(x) * 100)
    ps.rel.col <- prune_taxa(color_name, ps.rel)
    amp.r <- phyloseq_to_ampvis2(ps.rel.col)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col@phy_tree
    taxa <- as.data.frame(ps.col@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa) %>%
                      kableExtra::kable_paper() %>%
                      kableExtra::scroll_box(width ="100%", height = "200px"))
                    
  }
  return(l)
}


color_filt_broken <- function(ps, df, ps.pruned){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    ps.col.pruned <- prune_taxa(color_name, ps.pruned)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
    amp.r <- phyloseq_to_ampvis2(ps.rel)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col.pruned@phy_tree
    taxa <- as.data.frame(ps.col.pruned@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa) %>%
                      kableExtra::kable_paper() %>%
                      kableExtra::scroll_box(width ="100%", height = "200px"))
  }
  return(l)
}

detachAllPackages <- function() {

  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")

  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]

  package.list <- setdiff(package.list,basic.packages)

  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE, force = TRUE)

}

plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <-  seq(max(er[[metric]]), length=length(stat.test$p.adj), by=max(er[[metric]]/20))

  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    ggpubr::stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}


# standart NMDS plot tool frop phyloseq with some additional aestatics 
# have stress value on plot - may work as fuck
beta_custom_norm_NMDS_elli_w <- function(ps, seed = 7888, normtype="vst", Color="What", Group="Repeat"){
  require(phyloseq)
  require(ggplot2)
  require(ggpubr)
  library(ggforce)
  
  
  ordination.b <- ordinate(ps, "NMDS", "bray")
  mds <- as.data.frame(ordination.b$points)
  p  <-  plot_ordination(ps,
                         ordination.b,
                         type="sample",
                         color = Color,
                         title="NMDS - Bray-Curtis",
                         # title=NULL,
                         axes = c(1,2) ) + 
    theme_bw() + 
    theme(text = element_text(size = 10)) + 
    geom_point(size = 3) +
    annotate("text",
    x=min(mds$MDS1) + abs(min(mds$MDS1))/7,
    y=max(mds$MDS2),
    label=paste0("Stress -- ", round(ordination.b$stress, 3))) +
    geom_mark_ellipse(aes_string(group = Group, label = Group),
                      label.fontsize = 10,
                      label.buffer = unit(2, "mm"),
                      label.minwidth = unit(5, "mm"),
                      con.cap = unit(0.1, "mm"),
                      con.colour='gray') +
    theme(legend.position = "none") +
    ggplot2::scale_fill_viridis_c(option = "H")
  
  return(p)
}


# alpha with aov + tukie post-hock - useless, but it looks pretty good
plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <- seq(max(er[[metric]]), length=length(stat.test$p.adj.signif[stat.test$p.adj.signif != "ns"]), by=max(er[[metric]]/20))
  
  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y,
      hide.ns=TRUE) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}

lsf.str()
## beta_custom_norm_NMDS_elli_w : function (ps, seed = 7888, normtype = "vst", Color = "What", Group = "Repeat")  
## color_filt : function (ps, df)  
## color_filt_broken : function (ps, df, ps.pruned)  
## detachAllPackages : function ()  
## phyloseq_to_ampvis2 : function (physeq)  
## plot_alpha_w_toc : function (ps, group, metric)  
## ps_vst : function (ps, factor)

0.3.1 modify metadata


Add Group parameter to metadata -
- early - D01, D03, D05
- middle - D07, D08, D10
- late - D13, D14, D15

sample.data <- ps_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", 
                                 if_else(Day %in% c("D12", "D13", "D14", "D15"), "late", "bulk")))) %>% 
  dplyr::rename('Bag' = 'Day') %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late", "bulk"))) %>% 
  mutate(Day = Bag %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps_bulk) <- sample.data

 ps_bulk@sam_data %>% 
  DT::datatable(caption = "Sample data of phyloseq - 16S")

0.3.2 Select from the dataset only experemental part

ps.f <- prune_samples(!sample_data(ps_bulk)$Day %in% "bulk", ps_bulk)
ps.f  <- prune_taxa(taxa_sums(ps.f) > 0, ps.f)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ps.f@sam_data  %>% 
  DT::datatable(caption = "Sample data of phyloseq - 16S")

0.4 Alpha diversity

With bulk soil

p1 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "Observed")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(group)
## 
##   # Now:
##   data %>% select(all_of(group))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(metric)
## 
##   # Now:
##   data %>% select(all_of(metric))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
p2 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

Without bulk soil

p1 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

p.observed <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
p.shannon <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
p.simpson <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

0.4.0.1 mpd

mpd - index of alpha diversity “tree shagginess” - counted separately because it is in a separate package with built-in validity (based on permutations)
here is a link about the meanin of values in the columns:
https://www.rdocumentation.org/packages/picante/versions/1.8.2/topics/ses.mpd
in general the early stage is significantly less diverse than the other stages

physeq_merged <- merge_samples(ps.f, "Group", fun=sum)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# ps.f@sam_data

# picante::mpd(l_vst$blue$ps@otu_table@.Data, cophenetic(l_vst$blue$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)
# 
# picante::mpd(l_vst$salmon$ps@otu_table@.Data, cophenetic(l_vst$salmon$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)

mpd.res <- picante::ses.mpd(physeq_merged@otu_table@.Data, cophenetic(physeq_merged@phy_tree)) 

as.data.frame(mpd.res)
##        ntaxa  mpd.obs mpd.rand.mean mpd.rand.sd mpd.obs.rank  mpd.obs.z
## early    344 1.707396      1.841757  0.02880358            1 -4.6647217
## middle   445 1.832365      1.842819  0.02280044          335 -0.4585186
## late     687 1.852563      1.841550  0.01436611          786  0.7666258
##        mpd.obs.p runs
## early      0.001  999
## middle     0.335  999
## late       0.786  999

0.5 beta statistic

0.5.1 permanova

permanova - group significantlly different - dispersion between is more, than inside groups

dist <- phyloseq::distance(ps.f, "bray")
metadata <- as(sample_data(ps.f@sam_data), "data.frame")
vegan::adonis2(dist ~ Group, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = dist ~ Group, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Group     2   3.2510 0.33952 8.2247  0.001 ***
## Residual 32   6.3243 0.66048                  
## Total    34   9.5753 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.5.2 dynamic of beta diversity

Bray Curtis distance by steps between each day.   (For example, all D15 versus D14.) Can we also put soil respiration on this picture as well?

ps.f.r <- rarefy_even_depth(ps.f, rngseed = 777)

avg.r <- ps.f.r@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

avg <- ps.f@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

# avg %>%
#   as.matrix() %>%
#   as_tibble(rownames= "sample") %>%
#   pivot_longer(-sample) %>%
#   filter(sample < name) %>%
#   mutate(repeat_a = str_replace(sample, ".*-", ""),
#          repeat_b = str_replace(name, ".*-", ""), 
#          day_a = as.numeric(str_replace(sapply(strsplit(sample, "-"), `[`, 3), "D", "")),
#           day_b = as.numeric(str_replace(sapply(strsplit(name, "-"), `[`, 3), "D", "")),
#          diff = abs(day_a - day_b),
#          early = day_a < 10) %>% 
#   filter(repeat_a == repeat_b & diff < 10) %>%
#   group_by(diff, repeat_a, early) %>%
#   summarize(median = median(value)) %>%
#   ungroup() %>%
#   ggplot(aes(x=diff, y=median, color=early, group=paste0(repeat_a, early))) +
#   geom_line(size=0.25) +
#   geom_smooth(aes(group=early), se=FALSE, size=4) +
#   labs(x="Distance between time points",
#        y="Median Bray-Curtis distance") +
#   scale_x_continuous(breaks=1:9) +
#   scale_color_manual(name=NULL,
#                      breaks=c(TRUE, FALSE),
#                      values=c("blue", "red"),
#                      labels=c("Early", "Late")) +
#   guides(color = guide_legend(override.aes = list(size=1))) +
#   theme_classic()

avg.r %>%
  as.matrix() %>%
  as_tibble(rownames= "sample") %>%
  pivot_longer(-sample) %>%
  filter(sample < name) %>%
  mutate(repeat_a = str_replace(sample, ".*-", ""),
         repeat_b = str_replace(name, ".*-", ""), 
         day_a = as.numeric(str_replace(sapply(strsplit(sample, "\\."), `[`, 3), "D", "")),
         day_b = as.numeric(str_replace(sapply(strsplit(name, "\\."), `[`, 3), "D", ""))) %>% 
  mutate(day_a = as.numeric(as.factor(day_a) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "13", "8" = "14", "9" = "14", "10" = "15")),
         day_b = as.numeric(as.factor(day_b) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "12", "8" = "13", "9" = "14", "10" = "15")),
         diff = abs(day_a - day_b)) %>% 
  filter(diff == 1) %>% 
  mutate(day_b = as.factor(day_b) %>% forcats::fct_recode("14" = "2", "28" = "3","49" = "4",  "63" = "5", "91" = "6", "161" = "9", "140" = "8", "182" = "10", "119" = "7")) %>% 
  ggplot(aes(x=day_b, y=value)) +
    geom_boxplot() +
  theme_bw() +
  labs(x="Days",
       y="Bray-Curtis distance")

0.5.3 basic NMDS plot

From the picture it looks like there is a gradual slowing down of changes -
but from the previous picture it follows that this is not the case - there is a dip between 7-8-10,
but on average the points differ quite consistently.

beta_custom_norm_NMDS_elli_w(ps.f, C="Group", G="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1063746 
## Run 1 stress 0.1063746 
## ... New best solution
## ... Procrustes: rmse 0.00002091149  max resid 0.00007447643 
## ... Similar to previous best
## Run 2 stress 0.1427868 
## Run 3 stress 0.1099872 
## Run 4 stress 0.1396556 
## Run 5 stress 0.1063746 
## ... Procrustes: rmse 0.00000856638  max resid 0.00004570378 
## ... Similar to previous best
## Run 6 stress 0.1063746 
## ... Procrustes: rmse 0.00001753402  max resid 0.00006418496 
## ... Similar to previous best
## Run 7 stress 0.1411972 
## Run 8 stress 0.1063746 
## ... Procrustes: rmse 0.00004854709  max resid 0.0001716062 
## ... Similar to previous best
## Run 9 stress 0.1099872 
## Run 10 stress 0.1063499 
## ... New best solution
## ... Procrustes: rmse 0.00155448  max resid 0.006797445 
## ... Similar to previous best
## Run 11 stress 0.1448551 
## Run 12 stress 0.1099721 
## Run 13 stress 0.1482809 
## Run 14 stress 0.1438721 
## Run 15 stress 0.1099872 
## Run 16 stress 0.1409441 
## Run 17 stress 0.1393472 
## Run 18 stress 0.1099873 
## Run 19 stress 0.143872 
## Run 20 stress 0.1099872 
## *** Best solution repeated 1 times
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`

With bulk soil

beta_custom_norm_NMDS_elli_w(ps_bulk, C="Group", G="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1106692 
## Run 1 stress 0.1099946 
## ... New best solution
## ... Procrustes: rmse 0.01525233  max resid 0.06644051 
## Run 2 stress 0.1137241 
## Run 3 stress 0.08974122 
## ... New best solution
## ... Procrustes: rmse 0.04386934  max resid 0.2071995 
## Run 4 stress 0.08965403 
## ... New best solution
## ... Procrustes: rmse 0.008190238  max resid 0.04762122 
## Run 5 stress 0.08965588 
## ... Procrustes: rmse 0.001782023  max resid 0.009088074 
## ... Similar to previous best
## Run 6 stress 0.1102175 
## Run 7 stress 0.08965558 
## ... Procrustes: rmse 0.001606968  max resid 0.008004172 
## ... Similar to previous best
## Run 8 stress 0.08974122 
## ... Procrustes: rmse 0.008190385  max resid 0.04759668 
## Run 9 stress 0.08975613 
## ... Procrustes: rmse 0.002920705  max resid 0.01334062 
## Run 10 stress 0.1138449 
## Run 11 stress 0.08974122 
## ... Procrustes: rmse 0.008190597  max resid 0.04758139 
## Run 12 stress 0.08965393 
## ... New best solution
## ... Procrustes: rmse 0.00008023188  max resid 0.0002988637 
## ... Similar to previous best
## Run 13 stress 0.0896541 
## ... Procrustes: rmse 0.0001167688  max resid 0.000450411 
## ... Similar to previous best
## Run 14 stress 0.08974128 
## ... Procrustes: rmse 0.008191234  max resid 0.04767439 
## Run 15 stress 0.1102175 
## Run 16 stress 0.08974364 
## ... Procrustes: rmse 0.008271512  max resid 0.04765294 
## Run 17 stress 0.08974366 
## ... Procrustes: rmse 0.008271187  max resid 0.04752359 
## Run 18 stress 0.08974381 
## ... Procrustes: rmse 0.008272414  max resid 0.04767895 
## Run 19 stress 0.08974381 
## ... Procrustes: rmse 0.008273279  max resid 0.0477807 
## Run 20 stress 0.08975604 
## ... Procrustes: rmse 0.002890954  max resid 0.01309561 
## *** Best solution repeated 2 times

0.5.3.1 AncomBC

map_d_trial <- ps_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Bulk = ifelse(Day == "bulk", "bulk", "trial") %>% as.factor())

ps_bulk@sam_data <-  sample_data(map_d_trial)

set.seed(123)
ancom <-  ANCOMBC::ancombc2(data = ps_bulk, 
  tax_level = NULL,
  fix_formula = "Bulk",
  rand_formula = NULL,
  p_adj_method = "fdr",
  pseudo = 0, 
  pseudo_sens = TRUE,
  prv_cut = 0.10,
  lib_cut = 1000,
  s0_perc = 0.05,
  group = "Bulk",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  n_cl = 15, 
  verbose = TRUE,
  global = TRUE, 
  pairwise = TRUE, 
  dunnet = TRUE, 
  trend = FALSE,
  iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
    nrow = 2,
    byrow = TRUE),
    matrix(c(-1, 0, 1, -1),
    nrow = 2,
    byrow = TRUE)),
   node = list(2, 2),
   solver = "ECOS",
  B = 100)
  )
ancom_family <-  ANCOMBC::ancombc2(data = ps_bulk, 
  fix_formula = "Bulk",
  rand_formula = NULL,
  p_adj_method = "fdr",
  pseudo = 0, 
  pseudo_sens = TRUE,
  prv_cut = 0.10,
  lib_cut = 1000,
  s0_perc = 0.05,
  group = "Bulk",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  n_cl = 15, 
  tax_level = "Family",
  verbose = TRUE,
  global = TRUE, 
  pairwise = TRUE, 
  dunnet = TRUE, 
  trend = FALSE,
  iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
    nrow = 2,
    byrow = TRUE),
    matrix(c(-1, 0, 1, -1),
    nrow = 2,
    byrow = TRUE)),
   node = list(2, 2),
   solver = "ECOS",
  B = 100)
  )
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Obtaining initial estimates ...
## ML iteration = 1, epsilon = 1.8
## ML iteration = 2, epsilon = 2.4e-14
## Estimating sample-specific biases ...
## ANCOM-BC2 primary results ...
## The sensitivity analysis for the pseudo-count addition ...
ancom_family$res %>% 
  group_by(diff_Bulktrial) %>% 
  summarise(n = n())
## # A tibble: 2 × 2
##   diff_Bulktrial     n
##   <lgl>          <int>
## 1 FALSE             31
## 2 TRUE              47
taxa_family <- ps_bulk@tax_table %>% 
  as.data.frame() %>% 
  select(-c("Genus", "Species")) %>% 
  rownames_to_column("ID") %>% 
  select(-ID) %>% 
  distinct() 
  
ancom_family$res %>% 
  filter(diff_Bulktrial == TRUE) %>% 
  select(c("taxon", "lfc_Bulktrial")) %>% 
  rename("taxon" = "Family") %>% 
  left_join(taxa_family, by="Family") %>% 
  mutate(`more in` = ifelse(lfc_Bulktrial > 0, "trial", "bulk") %>% as.factor(),
         area = 1) %>% 
  ggplot(aes(area = area, fill = `more in`, subgroup = Phylum, label = Family)) +
  treemapify::geom_treemap() +
  treemapify::geom_treemap_subgroup_border(color = "white") +
  treemapify::geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5, colour =
                             "black", fontface = "italic", min.size = 0) +
  treemapify::geom_treemap_text(colour = "white", place = "topleft", reflow = T) +
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position="bottom") 

amp_bulk_venn <- phyloseq_to_ampvis2(ps_bulk)
p.venn.cc <- amp_venn(amp_bulk_venn,
         group_by = "Bulk",
         normalise = TRUE,
         cut_a = 0.000001,
         cut_f=0.000001,
            )

pw <- p.venn.cc
pw

# ggarrange(pw, pw, pw, pw,pw, pw, pw, pw, ncol = 1, labels = paste0("venn", seq(1, 8)))

0.6 Splitting the dataset

0.6.1 Basic information about dataset

With the bulk soil Phylum level

amp.b <- phyloseq_to_ampvis2(ps_bulk)
amp_heatmap(amp.b,
            tax_show = 22,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_class = "Proteobacteriota",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

ASV Level

amp_heatmap(amp.b,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

amp.b
## ampvis2 object with 5 elements. 
## Summary of OTU table:
## [1]       41     2062   624236     4412    36986    13850 15225.27
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   2062(100%)   2062(100%)   2062(100%)   2062(100%) 2047(99.27%) 1309(63.48%) 
##      Species 
##   114(5.53%) 
## 
## Metadata variables: 6 
##  SampleID, Bag, Description, Group, Day, Bulk

Without bulk

amp <- phyloseq_to_ampvis2(ps.f)
amp
## ampvis2 object with 5 elements. 
## Summary of OTU table:
## [1]       35     1063   510548     4412    32228    12752 14587.09
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   1063(100%)   1063(100%)   1063(100%)   1063(100%) 1054(99.15%)  736(69.24%) 
##      Species 
##    90(8.47%) 
## 
## Metadata variables: 5 
##  SampleID, Bag, Description, Group, Day
amp_heatmap(amp,
            tax_show = 22,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_class = "Proteobacteriota",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

amp_heatmap(amp,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

Let’s divide the dataset into two groups.

1st - more than 10% of samples should contain at least 10 reads - this group will be analyzed further

ps.inall <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

The second part - the remaining, but more than 100 reeds in all samples(hereinafter - outliers majors(OM))
The remaining phylotypes are dropped from the analysis.

ps.exl  <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ps.exl  <- prune_taxa(taxa_sums(ps.exl) > 100, ps.exl)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
amp.exl <- phyloseq_to_ampvis2(ps.exl)
amp_heatmap(amp.exl,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

Same, but with relative abundance.

ps.per  <-  phyloseq::transform_sample_counts(ps.f, function(x) x / sum(x) * 100) 
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, 
      showRemainingTaxa = TRUE)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, )

OM aggregated by genus. Relative abundance.

amp_heatmap(amp.exl.r,
            tax_show = 30, 
            group_by = "Day", 
            tax_aggregate = "Genus",
            tax_add = "Phylum",
            tax_class = "Proteobacteria",
            round =  2,
            normalise=FALSE, 
            showRemainingTaxa = TRUE)

If we look at how OMs are distributed by day - it seems that the distribution of OMs is related not only to the specificity of individual bags, but also to purely technical features - the outlier values are mostly not with biological repeats, but with technical ones (red line - selected visually)

p_box <- phyloseq::sample_sums(ps.per.exl) %>% 
  as.data.frame(col.names = "values") %>% 
  setNames(., nm = "values") %>% 
  rownames_to_column("samples") %>% 
  mutate(Day = sapply(strsplit(samples, "\\."), `[`, 3)) %>% 
  ggplot(aes(x=Day, y=values, color=Day, fill = Day)) +
  geom_boxplot(aes(color=Day, fill = Day)) +
  geom_point(color = "black", position = position_dodge(width=0.2)) +
  geom_hline(yintercept = 10, colour = "red") +
  theme_bw() +
  theme(legend.position = "none")
     
p_box <- p_box + viridis::scale_color_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.5)

p_box + viridis::scale_fill_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.3)

0.7 WGCNA

0.7.1 clr

after clr normalization / looks disgusting - let’s try vst normalization from DESeq2

otu.inall <- as.data.frame(ps.inall@otu_table@.Data) 
ps.inall.clr <- ps.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")
## pickSoftThreshold: will use block size 321.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 321 of 321
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1    0.133  19.60         0.8930 160.0000 160.00000 164.000
## 2      2    0.309 -18.70         0.8910  83.1000  82.90000  90.300
## 3      3    0.465 -11.70         0.8350  44.9000  44.50000  52.900
## 4      4    0.362  -5.99         0.7610  25.2000  24.60000  32.600
## 5      5    0.252  -3.34         0.7890  14.6000  14.00000  21.000
## 6      6    0.218  -2.19         0.7400   8.7500   8.31000  14.100
## 7      7    0.257  -1.82         0.7160   5.4300   5.09000   9.800
## 8      8    0.328  -1.71         0.6800   3.4900   3.21000   7.030
## 9      9    0.497  -1.92         0.7830   2.3100   2.09000   5.380
## 10    10    0.629  -2.03         0.7780   1.5800   1.39000   4.270
## 11    12    0.231  -4.27         0.0333   0.7990   0.66100   2.900
## 12    13    0.861  -2.22         0.9010   0.5900   0.47100   2.450
## 13    14    0.850  -2.16         0.8560   0.4450   0.34100   2.110
## 14    15    0.262  -3.70         0.0795   0.3430   0.24700   1.840
## 15    16    0.265  -3.55         0.0859   0.2690   0.18400   1.620
## 16    17    0.267  -3.42         0.0912   0.2140   0.13700   1.430
## 17    18    0.902  -1.97         0.9060   0.1730   0.10500   1.280
## 18    19    0.912  -1.92         0.9100   0.1420   0.08000   1.150
## 19    20    0.924  -1.87         0.9200   0.1170   0.06170   1.040
## 20    21    0.292  -4.09         0.1340   0.0981   0.04810   0.944
## 21    22    0.304  -3.06         0.1950   0.0829   0.03820   0.861
## 22    23    0.300  -2.94         0.1830   0.0706   0.03050   0.788
## 23    24    0.294  -2.79         0.1680   0.0607   0.02460   0.736
## 24    25    0.928  -1.62         0.9080   0.0525   0.01950   0.693
## 25    26    0.935  -1.60         0.9170   0.0458   0.01530   0.656
## 26    27    0.933  -1.57         0.9140   0.0402   0.01210   0.623
## 27    28    0.264  -2.43         0.0682   0.0354   0.00967   0.593
## 28    29    0.266  -2.40         0.0708   0.0314   0.00769   0.565
## 29    30    0.196  -2.67        -0.0255   0.0280   0.00608   0.541
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

0.7.2 vst

after vst normalisation

ps.varstab <- ps_vst(ps.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")
## pickSoftThreshold: will use block size 321.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 321 of 321
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1    1.0    0.339 -0.985         0.7120  46.800  43.10000  90.50
## 2    1.5    0.569 -1.150         0.8650  28.800  25.60000  65.50
## 3    2.0    0.711 -1.200         0.9340  18.900  16.90000  49.10
## 4    2.5    0.754 -1.210         0.9080  13.000  11.20000  38.10
## 5    3.0    0.807 -1.280         0.9260   9.260   7.36000  30.40
## 6    3.5    0.864 -1.330         0.9610   6.820   5.08000  24.80
## 7    4.0    0.863 -1.350         0.9310   5.160   3.59000  20.60
## 8    4.5    0.845 -1.380         0.8890   3.990   2.56000  17.30
## 9    5.0    0.780 -1.450         0.8240   3.140   1.84000  14.70
## 10   5.5    0.814 -1.440         0.8690   2.520   1.35000  12.70
## 11   6.0    0.819 -1.440         0.8580   2.050   1.03000  11.00
## 12   6.5    0.889 -1.350         0.9420   1.690   0.78200   9.66
## 13   7.0    0.868 -1.340         0.9070   1.410   0.60600   8.53
## 14   7.5    0.873 -1.330         0.9020   1.190   0.46700   7.68
## 15   8.0    0.818 -1.360         0.8180   1.020   0.38800   6.97
## 16   8.5    0.833 -1.360         0.8250   0.877   0.32000   6.35
## 17   9.0    0.854 -1.360         0.8550   0.762   0.25600   5.83
## 18   9.5    0.892 -1.370         0.9150   0.666   0.21400   5.37
## 19  10.0    0.886 -1.350         0.8940   0.587   0.17900   4.96
## 20  11.0    0.958 -1.290         0.9790   0.465   0.11800   4.29
## 21  12.0    0.902 -1.290         0.9060   0.376   0.08010   3.75
## 22  13.0    0.922 -1.270         0.9270   0.310   0.05370   3.31
## 23  14.0    0.913 -1.240         0.9080   0.260   0.03730   2.94
## 24  15.0    0.936 -1.190         0.9260   0.221   0.02670   2.63
## 25  16.0    0.924 -1.180         0.9110   0.190   0.02050   2.36
## 26  17.0    0.181 -1.840        -0.0518   0.165   0.01440   2.16
## 27  18.0    0.910 -1.210         0.8860   0.145   0.00980   2.05
## 28  19.0    0.901 -1.210         0.8770   0.129   0.00664   1.95
## 29  20.0    0.909 -1.240         0.8970   0.115   0.00468   1.87
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)


net3 <- WGCNA::blockwiseModules(data2,
                          power=5.5,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)


mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

unique(ps.inall@sam_data$Bag)
##  [1] D01 D03 D05 D07 D08 D10 D12 D13 D14 D15
## Levels: D01 D03 D05 D07 D08 D10 D12 D13 D14 D15

0.7.2.1 import libraries 2

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)
modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "day",
       y = "normalized abundance")

p + scale_color_manual(values = levels(submod_df$module)) +
  scale_x_discrete(labels=(c("D01"="3",
                                "D03"="14",
                                "D05"="28",
                                "D07"="49" ,
                                "D08"="63",
                                "D10"="91",
                                "D12"="119",
                                "D13"="140",
                                "D14"="161",
                                "D15"="182")
         ))

#### ordination

UNIFRAC - the sausage narrows down

ps.inall.col <- ps.inall

df <- module_df %>% 
  rename("id" = "asv")
df <- df %>% 
  dplyr::select(-"id") %>% 
  mutate(colors = as.factor(colors))
taxa <- as.data.frame(ps.inall@tax_table@.Data)
tx <- cbind(taxa, df)
tx$colors <- factor(tx$colors, levels = c("cyan", "darkgreen", "red", "salmon", "blue", "plum"))
tax_table(ps.inall.col) <- tax_table(as.matrix(tx))
ord <- ordinate(ps.inall.col, "NMDS", "unifrac", "species")
## Run 0 stress 0.07519118 
## Run 1 stress 0.07358942 
## ... New best solution
## ... Procrustes: rmse 0.01813614  max resid 0.05794054 
## Run 2 stress 0.07083612 
## ... New best solution
## ... Procrustes: rmse 0.05788181  max resid 0.2184256 
## Run 3 stress 0.07358915 
## Run 4 stress 0.07444672 
## Run 5 stress 0.07926786 
## Run 6 stress 0.07632554 
## Run 7 stress 0.05777387 
## ... New best solution
## ... Procrustes: rmse 0.04546038  max resid 0.2215956 
## Run 8 stress 0.07083624 
## Run 9 stress 0.07444694 
## Run 10 stress 0.07049655 
## Run 11 stress 0.07553988 
## Run 12 stress 0.0704966 
## Run 13 stress 0.0704966 
## Run 14 stress 0.08071969 
## Run 15 stress 0.07557142 
## Run 16 stress 0.05788492 
## ... Procrustes: rmse 0.005914098  max resid 0.02748847 
## Run 17 stress 0.08001937 
## Run 18 stress 0.05777385 
## ... New best solution
## ... Procrustes: rmse 0.00001062268  max resid 0.00003468096 
## ... Similar to previous best
## Run 19 stress 0.07078899 
## Run 20 stress 0.07858074 
## *** Best solution repeated 1 times
plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("cyan", "darkgreen", "red", "salmon", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

Bray - the sausage even
Late stages on the left - with early clusters overlapping, late clusters seems separated What affects on the axis2? There is clearly some sort of pattern.

ord <- ordinate(ps.inall.col, "NMDS", "bray", "species")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09266802 
## Run 1 stress 0.09266803 
## ... Procrustes: rmse 0.00002041571  max resid 0.00007667111 
## ... Similar to previous best
## Run 2 stress 0.1309815 
## Run 3 stress 0.09266805 
## ... Procrustes: rmse 0.00005002405  max resid 0.000204567 
## ... Similar to previous best
## Run 4 stress 0.09266803 
## ... Procrustes: rmse 0.00002339873  max resid 0.00008107723 
## ... Similar to previous best
## Run 5 stress 0.1296072 
## Run 6 stress 0.1311172 
## Run 7 stress 0.1383667 
## Run 8 stress 0.09266802 
## ... Procrustes: rmse 0.000008511634  max resid 0.00004247589 
## ... Similar to previous best
## Run 9 stress 0.1456796 
## Run 10 stress 0.09266802 
## ... Procrustes: rmse 0.000003038434  max resid 0.00001420329 
## ... Similar to previous best
## Run 11 stress 0.09266802 
## ... Procrustes: rmse 0.00001295347  max resid 0.00005351635 
## ... Similar to previous best
## Run 12 stress 0.09266802 
## ... Procrustes: rmse 0.000005447383  max resid 0.0000249077 
## ... Similar to previous best
## Run 13 stress 0.09266803 
## ... Procrustes: rmse 0.00002505859  max resid 0.00008787721 
## ... Similar to previous best
## Run 14 stress 0.09266802 
## ... Procrustes: rmse 0.00000349461  max resid 0.00001394197 
## ... Similar to previous best
## Run 15 stress 0.1296263 
## Run 16 stress 0.1297207 
## Run 17 stress 0.09266804 
## ... Procrustes: rmse 0.00004076479  max resid 0.0001694088 
## ... Similar to previous best
## Run 18 stress 0.09266802 
## ... Procrustes: rmse 0.000006043647  max resid 0.00001496754 
## ... Similar to previous best
## Run 19 stress 0.09266802 
## ... Procrustes: rmse 0.000008608011  max resid 0.0000221727 
## ... Similar to previous best
## Run 20 stress 0.09266802 
## ... Procrustes: rmse 0.000004973947  max resid 0.00001244682 
## ... Similar to previous best
## *** Best solution repeated 13 times
plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("cyan", "darkgreen", "red", "salmon", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

0.7.2.2 visualisation

Next are the same pictures for all the groups.

l_vst <- color_filt(ps.inall, submod_df)
l_vst

$cyan \(cyan\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 82 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 82 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 82 tips and 81 internal nodes ] refseq() DNAStringSet: [ 82 reference sequences ]

\(cyan\)amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 82 70590 183 5339 1720 2016.86

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 82(100%) 82(100%) 82(100%) 82(100%) 82(100%) 71(86.59%) 12(14.63%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(cyan\)heat \(cyan\)heat_rel \(cyan\)tree \(cyan\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq143 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Burkholderia NA
Seq65 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Alcaligenaceae NA NA
Seq149 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Comamonadaceae Variovorax NA
Seq503 Bacteria Cyanobacteriota Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq622 Bacteria Pseudomonadota Alphaproteobacteria Ferrovibrionales Ferrovibrionaceae Ferrovibrio soli
Seq341 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae NA NA
Seq239 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Beijerinckiaceae Methylobacterium-Methylorubrum NA
Seq232 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Beijerinckiaceae Bosea thiooxidans
Seq538 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Beijerinckiaceae Bosea NA
Seq105 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Tardiphaga robiniae
Seq31 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Starkeya NA
Seq212 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Hyphomicrobiaceae Hyphomicrobium NA
Seq605 Bacteria Pseudomonadota Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq386 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingopyxis NA
Seq610 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq595 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq132 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Mesorhizobium NA
Seq462 Bacteria Pseudomonadota Alphaproteobacteria Caulobacterales Caulobacteraceae NA NA
Seq77 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Conexibacter NA
Seq549 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq345 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq103 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Sorangium NA
Seq486 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Ilumatobacteraceae NA NA
Seq113 Bacteria Actinobacteriota Actinobacteria Micrococcales Promicromonosporaceae Promicromonospora NA
Seq598 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Leifsonia NA
Seq171 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae NA aoyamense
Seq674 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Agromyces NA
Seq139 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq159 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Herbidospora mongoliensis
Seq654 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Nonomuraea NA
Seq86 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Thermomonosporaceae Actinocorallia NA
Seq267 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq745 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae Jiangella NA
Seq245 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Pedobacter panaciterrae
Seq189 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Pedobacter NA
Seq492 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq716 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq309 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus NA
Seq101 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq52 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq18 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq265 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq415 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq439 Bacteria Bacillota Bacilli Bacillales Planococcaceae Domibacillus NA
Seq121 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus NA
Seq54 Bacteria Bacillota Bacilli Bacillales Planococcaceae Lysinibacillus NA
Seq144 Bacteria Bacillota Bacilli Bacillales Planococcaceae Lysinibacillus NA
Seq84 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq349 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq228 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq631 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae Chthoniobacter NA
Seq451 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae Chthoniobacter NA
Seq431 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Roseimicrobium gellanilyticum
Seq315 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Verrucomicrobium spinosum
Seq246 Bacteria Planctomycetota Planctomycetes Gemmatales Gemmataceae Gemmata NA
Seq375 Bacteria Planctomycetota Planctomycetes Isosphaerales Isosphaeraceae Singulisphaera NA
Seq395 Bacteria Acidobacteriota Acidobacteriae Acidobacteriales Acidobacteriaceae (Subgroup 1) Edaphobacter NA
Seq73 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq237 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq420 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq32 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq765 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq40 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella hibisci
Seq234 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq372 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq126 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq155 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga ginsengihumi
Seq125 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga arvensicola
Seq496 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga soli
Seq76 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq410 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq50 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq481 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq147 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq1006 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Moraxellaceae NA NA
Seq727 Bacteria Pseudomonadota Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter NA
Seq409 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq471 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq581 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq335 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Tahibacter NA
Seq186 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Xanthomonas NA
Seq606 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA

$darkgreen \(darkgreen\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 29 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 29 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 29 tips and 28 internal nodes ] refseq() DNAStringSet: [ 29 reference sequences ]

\(darkgreen\)amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 29 173281 336 15108 4808 4950.89

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 29(100%) 29(100%) 29(100%) 29(100%) 29(100%) 28(96.55%) 2(6.9%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(darkgreen\)heat \(darkgreen\)heat_rel \(darkgreen\)tree \(darkgreen\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq120 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Burkholderia telluris
Seq69 Bacteria Pseudomonadota Alphaproteobacteria Azospirillales Inquilinaceae Inquilinus ginsengisoli
Seq4 Bacteria Pseudomonadota Alphaproteobacteria Azospirillales Inquilinaceae Inquilinus NA
Seq370 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudorhodoplanes NA
Seq9 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Bradyrhizobium NA
Seq82 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Bradyrhizobium NA
Seq22 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Starkeya NA
Seq44 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq10 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq29 Bacteria Myxococcota Polyangia Nannocystales Nannocystaceae Nannocystis NA
Seq364 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Labilithrix NA
Seq51 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq34 Bacteria Actinobacteriota Actinobacteria Streptosporangiales Streptosporangiaceae Herbidospora NA
Seq57 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq25 Bacteria Bacillota Bacilli Bacillales Bacillaceae Terribacillus NA
Seq3 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus NA
Seq5 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus NA
Seq13 Bacteria Bacillota Bacilli Bacillales Planococcaceae NA NA
Seq6 Bacteria Bacillota Bacilli Bacillales Planococcaceae Solibacillus NA
Seq19 Bacteria Bacillota Bacilli Bacillales Planococcaceae Solibacillus NA
Seq17 Bacteria Planctomycetota Planctomycetes Isosphaerales Isosphaeraceae Singulisphaera NA
Seq527 Bacteria Acidobacteriota Acidobacteriae Acidobacteriales Acidobacteriaceae (Subgroup 1) Edaphobacter NA
Seq16 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq7 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq1 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq134 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq81 Bacteria Bacteroidota Bacteroidia Cytophagales Spirosomaceae Dyadobacter NA
Seq15 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Luteibacter NA
Seq26 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Luteimonas NA

$red \(red\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 139 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 139 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 139 tips and 138 internal nodes ] refseq() DNAStringSet: [ 139 reference sequences ]

\(red\)amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 139 109849 0 14357 1966 3138.54

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 139(100%) 139(100%) 139(100%) 139(100%) 139(100%) 86(61.87%) 11(7.91%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(red\)heat \(red\)heat_rel \(red\)tree \(red\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq117 Archaea Thermoproteota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq271 Archaea Thermoproteota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae Candidatus Nitrocosmicus NA
Seq61 Archaea Thermoproteota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq62 Archaea Thermoproteota Nitrososphaeria Nitrososphaerales Nitrososphaeraceae NA NA
Seq214 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Aquicella NA
Seq758 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Aquicella NA
Seq289 Bacteria Cyanobacteriota Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq23 Bacteria Cyanobacteriota Vampirivibrionia Obscuribacterales Obscuribacteraceae NA NA
Seq318 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas naasensis
Seq71 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas NA
Seq480 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas jaspsi
Seq310 Bacteria Pseudomonadota Alphaproteobacteria Dongiales Dongiaceae Dongia NA
Seq262 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella soli
Seq221 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq83 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq321 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq226 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella massiliensis
Seq537 Bacteria Pseudomonadota Alphaproteobacteria Rhodospirillales Magnetospiraceae NA NA
Seq712 Bacteria Pseudomonadota Alphaproteobacteria Acetobacterales Acetobacteraceae NA NA
Seq433 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Kaistiaceae Kaistia NA
Seq270 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudorhodoplanes NA
Seq107 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq166 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq183 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae Pseudolabrys NA
Seq426 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Xanthobacteraceae NA NA
Seq175 Bacteria Pseudomonadota Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq129 Bacteria Pseudomonadota Alphaproteobacteria Micropepsales Micropepsaceae NA NA
Seq157 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Altererythrobacter NA
Seq137 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingobium NA
Seq28 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Devosiaceae Devosia NA
Seq592 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Mesorhizobium NA
Seq711 Bacteria Pseudomonadota Alphaproteobacteria Caulobacterales Caulobacteraceae Phenylobacterium NA
Seq33 Bacteria Pseudomonadota Alphaproteobacteria Caulobacterales Caulobacteraceae Caulobacter NA
Seq225 Bacteria Pseudomonadota Alphaproteobacteria Caulobacterales Hyphomonadaceae Hirschia NA
Seq39 Bacteria Gemmatimonadota Gemmatimonadetes Gemmatimonadales Gemmatimonadaceae NA NA
Seq179 Bacteria Nitrospirota Nitrospiria Nitrospirales Nitrospiraceae Nitrospira japonica
Seq30 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Conexibacter NA
Seq156 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales Solirubrobacteraceae Solirubrobacter NA
Seq192 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales 67-14 NA NA
Seq383 Bacteria Actinobacteriota Thermoleophilia Solirubrobacterales 67-14 NA NA
Seq532 Bacteria Myxococcota Polyangia Haliangiales Haliangiaceae Haliangium NA
Seq504 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq172 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq46 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq493 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq100 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq35 Bacteria Myxococcota Polyangia Polyangiales BIrii41 NA NA
Seq399 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Minicystis NA
Seq128 Bacteria Myxococcota Polyangia Polyangiales Sandaracinaceae NA NA
Seq274 Bacteria Bdellovibrionota Bdellovibrionia Bdellovibrionales Bdellovibrionaceae Bdellovibrio NA
Seq119 Bacteria Bdellovibrionota Bdellovibrionia Bdellovibrionales Bdellovibrionaceae Bdellovibrio NA
Seq253 Bacteria Dependentiae Babeliae Babeliales UBA12409 NA NA
Seq79 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae Microlunatus NA
Seq416 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Nocardioidaceae Kribbella NA
Seq381 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Ilumatobacteraceae NA NA
Seq173 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Iamiaceae Iamia NA
Seq235 Bacteria Actinobacteriota Acidimicrobiia Microtrichales Iamiaceae Iamia NA
Seq43 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Galbitalea NA
Seq290 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Galbitalea NA
Seq693 Bacteria Actinobacteriota Actinobacteria Micromonosporales Micromonosporaceae Luedemannella NA
Seq148 Bacteria Actinobacteriota Actinobacteria Micromonosporales Micromonosporaceae Dactylosporangium NA
Seq357 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae NA NA
Seq317 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Mucilaginibacter NA
Seq230 Bacteria Bacteroidota Bacteroidia Sphingobacteriales Sphingobacteriaceae Mucilaginibacter calamicampi
Seq90 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq520 Bacteria Bacteroidota Bacteroidia Sphingobacteriales NS11-12 marine group NA NA
Seq145 Bacteria Bacteroidota Bacteroidia Sphingobacteriales env.OPS 17 NA NA
Seq284 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus lautus
Seq258 Bacteria Bacillota Bacilli Paenibacillales Paenibacillaceae Paenibacillus NA
Seq244 Bacteria Bacillota Bacilli Bacillales Planococcaceae Paenisporosarcina NA
Seq85 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Salinispira NA
Seq185 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Spirochaeta 2 NA
Seq236 Bacteria Spirochaetota Spirochaetia Spirochaetales Spirochaetaceae Spirochaeta 2 NA
Seq590 Bacteria Patescibacteria Saccharimonadia Saccharimonadales LWQ8 NA NA
Seq238 Bacteria Fibrobacterota Fibrobacteria Fibrobacterales Fibrobacteraceae NA NA
Seq490 Bacteria Chloroflexota Anaerolineae Anaerolineales Anaerolineaceae NA NA
Seq612 Bacteria Chloroflexota Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq222 Bacteria Chloroflexota Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq421 Bacteria Chloroflexota Chloroflexia Chloroflexales Roseiflexaceae NA NA
Seq613 Bacteria Chloroflexota Chloroflexia Thermomicrobiales JG30-KF-CM45 NA NA
Seq530 Bacteria Chloroflexota Chloroflexia Thermomicrobiales JG30-KF-CM45 NA NA
Seq388 Bacteria Armatimonadota Fimbriimonadia Fimbriimonadales Fimbriimonadaceae NA NA
Seq576 Bacteria Verrucomicrobiota Verrucomicrobiae Pedosphaerales Pedosphaeraceae NA NA
Seq417 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Lacunisphaera NA
Seq152 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Lacunisphaera limnophila
Seq517 Bacteria Verrucomicrobiota Verrucomicrobiae Opitutales Opitutaceae Opitutus NA
Seq153 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq281 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Terrimicrobiaceae Terrimicrobium NA
Seq528 Bacteria Verrucomicrobiota Verrucomicrobiae Chthoniobacterales Chthoniobacteraceae LD29 NA
Seq659 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Rubritaleaceae Luteolibacter NA
Seq713 Bacteria Planctomycetota Phycisphaerae Phycisphaerales Phycisphaeraceae NA NA
Seq601 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq839 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq521 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq585 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae NA NA
Seq195 Bacteria Planctomycetota Planctomycetes Gemmatales Gemmataceae Gemmata NA
Seq404 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae Pir4 lineage NA
Seq440 Bacteria Planctomycetota Planctomycetes Pirellulales Pirellulaceae Pir4 lineage NA
Seq300 Bacteria Planctomycetota Planctomycetes Planctomycetales Rubinisphaeraceae SH-PL14 NA
Seq292 Bacteria Planctomycetota Planctomycetes Planctomycetales Schlesneriaceae Schlesneria NA
Seq200 Bacteria Acidobacteriota Blastocatellia Blastocatellales Blastocatellaceae NA NA
Seq342 Bacteria Acidobacteriota Vicinamibacteria Vicinamibacterales Vicinamibacteraceae NA NA
Seq434 Bacteria Acidobacteriota Vicinamibacteria Vicinamibacterales Vicinamibacteraceae NA NA
Seq607 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq12 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq196 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq529 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq11 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae NA NA
Seq570 Bacteria Bacteroidota Bacteroidia Sphingobacteriales env.OPS 17 NA NA
Seq178 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq92 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq326 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Terrimonas NA
Seq768 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq191 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq557 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae NA NA
Seq671 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq459 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq550 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq675 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Edaphobaculum NA
Seq547 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Taibaiella NA
Seq834 Bacteria Bacteroidota Bacteroidia Cytophagales Amoebophilaceae Candidatus Amoebophilus NA
Seq760 Bacteria Pseudomonadota Gammaproteobacteria Coxiellales Coxiellaceae Coxiella NA
Seq353 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales 211ds20 NA NA
Seq169 Bacteria Pseudomonadota Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter flavus
Seq352 Bacteria Pseudomonadota Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter NA
Seq506 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq58 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq264 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq231 Bacteria Pseudomonadota Gammaproteobacteria Gammaproteobacteria Incertae Sedis Unknown Family Acidibacter NA
Seq544 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Luteimonas vadosa
Seq114 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Dokdonella ginsengisoli
Seq207 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Rhodanobacteraceae Dokdonella NA
Seq627 Bacteria Pseudomonadota Gammaproteobacteria Salinisphaerales Solimonadaceae NA NA
Seq562 Bacteria Pseudomonadota Gammaproteobacteria Salinisphaerales Solimonadaceae Alkanibacter NA
Seq786 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq782 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq339 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq138 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae NA NA
Seq358 Bacteria Pseudomonadota Gammaproteobacteria Legionellales Legionellaceae Legionella NA

$salmon \(salmon\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 71 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 71 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 71 tips and 70 internal nodes ] refseq() DNAStringSet: [ 71 reference sequences ]

\(salmon\)amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 71 115593 29 20719 779 3302.66

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 71(100%) 71(100%) 71(100%) 71(100%) 71(100%) 67(94.37%) 11(15.49%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

\(salmon\)heat \(salmon\)heat_rel \(salmon\)tree \(salmon\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq87 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia armeniaca
Seq151 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae NA NA
Seq241 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae Pseudoduganella NA
Seq170 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae Pseudoduganella eburnea
Seq141 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia NA
Seq324 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Oxalobacteraceae Massilia NA
Seq208 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Cupriavidus NA
Seq8 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Cupriavidus NA
Seq14 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Alcaligenaceae Achromobacter NA
Seq55 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Comamonadaceae Xylophilus paradoxus
Seq115 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Comamonadaceae NA NA
Seq552 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Comamonadaceae Rhizobacter NA
Seq412 Bacteria Pseudomonadota Alphaproteobacteria Sphingomonadales Sphingomonadaceae Sphingomonas mucosissima
Seq540 Bacteria Pseudomonadota Alphaproteobacteria Reyranellales Reyranellaceae Reyranella NA
Seq123 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Beijerinckiaceae Microvirga NA
Seq359 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq371 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Devosiaceae Devosia neptuniae
Seq257 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Shinella NA
Seq202 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae NA NA
Seq42 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Mycoplana NA
Seq422 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Ensifer NA
Seq21 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium NA
Seq133 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Neorhizobium NA
Seq348 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae Allorhizobium azooxidifex
Seq177 Bacteria Pseudomonadota Alphaproteobacteria Rhizobiales Rhizobiaceae NA NA
Seq423 Bacteria Pseudomonadota Alphaproteobacteria Caulobacterales Caulobacteraceae Phenylobacterium mobile
Seq611 Bacteria Myxococcota Polyangia Haliangiales Haliangiaceae Haliangium NA
Seq401 Bacteria Myxococcota Polyangia Polyangiales Polyangiaceae Pajaroellobacter NA
Seq325 Bacteria Actinobacteriota Actinobacteria Micrococcales Promicromonosporaceae Cellulosimicrobium NA
Seq95 Bacteria Actinobacteriota Actinobacteria Micrococcales Microbacteriaceae Microbacterium NA
Seq74 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Mycobacteriaceae Mycobacterium NA
Seq443 Bacteria Actinobacteriota Actinobacteria Glycomycetales Glycomycetaceae Glycomyces NA
Seq78 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq94 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq414 Bacteria Actinobacteriota Actinobacteria Streptomycetales Streptomycetaceae Streptomyces NA
Seq181 Bacteria Verrucomicrobiota Verrucomicrobiae Verrucomicrobiales Verrucomicrobiaceae Roseimicrobium NA
Seq218 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq390 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq142 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq255 Bacteria Bacteroidota Bacteroidia Cytophagales Microscillaceae Ohtaekwangia NA
Seq53 Bacteria Bacteroidota Bacteroidia Flavobacteriales Flavobacteriaceae Flavobacterium NA
Seq261 Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Chryseobacterium ginsenosidimutans
Seq233 Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Chryseobacterium NA
Seq446 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Flavitalea NA
Seq97 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq131 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq136 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Niastella NA
Seq167 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Pseudoflavitalea NA
Seq37 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq215 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq248 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga humicola
Seq112 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq89 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga pinensis
Seq2 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq323 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq20 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq445 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae Chitinophaga NA
Seq38 Bacteria Bacteroidota Bacteroidia Cytophagales Spirosomaceae Dyadobacter NA
Seq362 Bacteria Pseudomonadota Gammaproteobacteria Steroidobacterales Steroidobacteraceae Steroidobacter agariperforans
Seq355 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Pseudoxanthomonas NA
Seq60 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Stenotrophomonas NA
Seq130 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Stenotrophomonas NA
Seq305 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Lysobacter NA
Seq41 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales Xanthomonadaceae Lysobacter NA
Seq667 Bacteria Pseudomonadota Gammaproteobacteria Legionellales Legionellaceae Legionella NA
Seq176 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq24 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq27 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq99 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq56 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
Seq279 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Klebsiella NA

0.7.3 mdp and other indices for clusters

I can’t do stat mdp support for clusters, but the early clusters(salmon, cyan) have lower mdp than the later ones.

external_empty_dataframe <- data.frame(cluster=factor(),
                                       mdp=numeric(),
                                       richness=numeric(),
                                       stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  ps.in <- l_vst[[i]][["ps"]]
  m <- ps.in@otu_table@.Data %>% 
    colSums() %>% 
    as.data.frame() %>% 
    as.matrix() %>% 
    t()
  
  mdp_index <- picante::mpd(m, cophenetic(ps.in@phy_tree)) 
  ricness_index <- ps.in@tax_table %>% 
    rownames() %>% 
    length()
  d <- data.frame(cluster = i,
                  mdp = mdp_index,
                  richness = ricness_index)
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

external_empty_dataframe %>% 
  dplyr::arrange(mdp)
##     cluster      mdp richness
## 1 darkgreen 1.541596       29
## 2    salmon 1.629239       71
## 3      cyan 1.754819       82
## 4       red 1.928198      139
colSums(l_vst$salmon$ps@otu_table@.Data)
##  Seq87 Seq151 Seq241 Seq170 Seq141 Seq324 Seq208   Seq8  Seq14  Seq55 Seq115 
##   1570    975    549    765    703    390    669   9841   6482   2589   1262 
## Seq552 Seq412 Seq540 Seq123 Seq359 Seq371 Seq257 Seq202  Seq42 Seq422  Seq21 
##    195    304    200    669    337    341    516    688   3429    261   5170 
## Seq133 Seq348 Seq177 Seq423 Seq611 Seq401 Seq325  Seq95  Seq74 Seq443  Seq78 
##   1064    363    798    158    160    310    390   1469   1941    249   1822 
##  Seq94 Seq414 Seq181 Seq218 Seq390 Seq142 Seq255  Seq53 Seq261 Seq233 Seq446 
##   1179    301    791    638    317   1038    523   2663    512    572    266 
##  Seq97 Seq131 Seq136 Seq167  Seq37 Seq215 Seq248 Seq112  Seq89   Seq2 Seq323 
##   1457   1047   1054    875   3566    653    532   1311   1524  16855    392 
##  Seq20 Seq445  Seq38 Seq362 Seq355  Seq60 Seq130 Seq305  Seq41 Seq667 Seq176 
##   5184    266   3539    350    357   2372   1105    189   3465    137    799 
##  Seq24  Seq27  Seq99  Seq56 Seq279 
##   4452   4271   1373   2582    457

0.8 Add a metadata

list.files("meta/")
## [1] "cell_realtime_stat.xlsx" "cell_resp_ch_stat.xlsx" 
## [3] "period_legend.xlsx"
realtime.data <- readxl::read_excel("meta/cell_realtime_stat.xlsx")
period.data <- readxl::read_excel("meta/period_legend.xlsx")
resp.data <- readxl::read_excel("meta/cell_resp_ch_stat.xlsx")
realtime.data
## # A tibble: 36 × 29
##    id        day contr…¹ CELL_…² CELL_…³ CELL_…⁴ CELL_…⁵ CELL_…⁶ CELL_…⁷ CELL_…⁸
##    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 G01-1-1     3    15.6    33.2    31.1    33.6    36.6    33.8    38.0    33.0
##  2 G01-1-2     3    15.5    33.3    31.3    32.9    36.2    33.8    41      32.7
##  3 G01-1-3     3    15.5    33.6    31.2    32.9    35.8    33.3    38.2    32.9
##  4 G01-2-1     3    17.1    36.0    31.1    33.1    36.9    36.4    38.8    33.8
##  5 G01-2-2     3    17.3    35.6    31.0    32.8    37.8    35.7    40.1    33.6
##  6 G01-2-3     3    17.1    36      31.1    32.8    38.1    35.2    37.6    33.6
##  7 G01-3-1     3    16.4    35.4    31.6    34.9    39.1    35.3    40.9    33.2
##  8 G01-3-2     3    16.5    35.6    31.3    34.4    37.9    34.6    38.9    33.1
##  9 G01-3-3     3    16.4    35.8    31.5    34.4    41.6    35.0    36.5    33.4
## 10 G05-1-1    28    18.2    26.3    29.1    29.4    28.2    28.4    37.4    34.5
## # … with 26 more rows, 19 more variables: CELL_193122 <dbl>, CELL_73229 <dbl>,
## #   CELL_47814 <dbl>, CELL_163125 <dbl>, CELL_73266 <dbl>, CELL_88582 <dbl>,
## #   CELL_63583 <dbl>, CELL_14199 <dbl>, CELL_95616 <dbl>, CELL_63504 <dbl>,
## #   CELL_08643 <chr>, CELL_199599 <dbl>, CELL_01426 <dbl>, CELL_71601 <dbl>,
## #   CELL_45099 <dbl>, CELL_191900 <dbl>, CELL_99463 <dbl>, CELL_74579 <dbl>,
## #   CELL_183403 <dbl>, and abbreviated variable names ¹​contr_16S, ²​CELL_172283,
## #   ³​CELL_203163, ⁴​CELL_83325, ⁵​CELL_188413, ⁶​CELL_109631, ⁷​CELL_188119, …

0.8.1 Realtime data

impute.mean <- function(x) replace(x, is.na(x), mean(x, na.rm = TRUE))

realtime_data <- realtime.data %>% 
  mutate(CELL_08643 = as.numeric(CELL_08643)) %>% 
  group_by(day) %>%
  mutate(nice_cell = impute.mean(CELL_08643)) %>% 
  mutate(day = as.factor(day))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `CELL_08643 = as.numeric(CELL_08643)`.
## Caused by warning:
## ! NAs introduced by coercion
realtime_zjena <- readxl::read_excel("cellulases_gene_expression(1).xlsx")
geom_mean <-function(x){exp(mean(log(x)))}

realtime_zjena_geom <- realtime_zjena %>% 
  mutate(repeats = paste0(realtime_zjena$week, "-", rep(1:3, 3, each=3))) %>% 
  relocate(repeats, 1) %>% 
  group_by(repeats) %>% 
  summarise_if(is.numeric, geom_mean) %>% 
  mutate(week = as.factor(week)) %>% 
  arrange(week)

UPGMA cellulase(distance - bray).
I don’t really know what that means. Depending on the distance, you get very different clusters.

realtime_matrix <- realtime_zjena_geom %>% 
  column_to_rownames("repeats") %>% 
  select_if(is.numeric) %>% 
  as.matrix()

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="bray"), "average")
plot(hcl)

And here is the Euclidean distance.
Nothing to compare with the previous picture

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="euclidian"), "average")
plot(hcl)

Cellulases corplot.
Correlations only

m = cor(realtime_matrix)
corrplot::corrplot(m)

Only significant correlations . In general, there are clusters, but the correlations are not significant (with some exceptions)

cor_test_mat <- psych::corr.test(realtime_matrix)$p
corrplot::corrplot(m, p.mat = cor_test_mat, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

The same, but the matrix is already logarithmased

cor_test_mat <- psych::corr.test(log(realtime_matrix))
corrplot::corrplot(cor_test_mat$r, p.mat = cor_test_mat$p, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

add phylogenic tree (mafft - iqtree(ML))

realtime_tree <- ape::read.tree("al_chz.fasta.contree")
plot(realtime_tree)

library(tidyverse)

realtime_data %>% 
  select(c("day", "id", "contr_16S", "CELL_172283")) %>% 
  mutate(bio_repl = gsub("-[1-3]$", "", id)) %>% 
  group_by(bio_repl, day) %>% 
  summarise(contr_tmean = mean(contr_16S),
            data_tmean = mean(CELL_172283)) %>% 
  mutate(dCt = data_tmean - contr_tmean)
## `summarise()` has grouped output by 'bio_repl'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 5
## # Groups:   bio_repl [12]
##    bio_repl day   contr_tmean data_tmean   dCt
##    <chr>    <fct>       <dbl>      <dbl> <dbl>
##  1 G01-1    3            15.5       33.4 17.8 
##  2 G01-2    3            17.2       35.9 18.7 
##  3 G01-3    3            16.4       35.6 19.2 
##  4 G05-1    28           18.4       26.2  7.81
##  5 G05-2    28           16.5       26.2  9.73
##  6 G05-3    28           18.5       32.0 13.6 
##  7 G10-1    91           17.4       26.2  8.72
##  8 G10-2    91           16.3       24.1  7.79
##  9 G10-3    91           17.5       26.3  8.81
## 10 G14-1    161          18.0       28.1 10.1 
## 11 G14-2    161          16.5       27.0 10.6 
## 12 G14-3    161          16.9       25.4  8.41
resp_data <- resp.data %>% 
  group_by(day) %>%
  mutate(control = impute.mean(control)) %>% 
  mutate(straw = impute.mean(straw)) 

resp_data
## # A tibble: 164 × 3
## # Groups:   day [27]
##      day control straw
##    <dbl>   <dbl> <dbl>
##  1     0    250   250 
##  2     3    285.  723.
##  3     3    263.  832.
##  4     3    131.  657.
##  5     3    307.  525.
##  6     3    241.  744.
##  7     3    245.  701.
##  8     3    245.  657.
##  9     7    274.  690.
## 10     7    296.  690.
## # … with 154 more rows

0.8.2 Knee plot

Soil resperation - median(straw)/median(control)

You can use a third-party package (KneeArrower) to understand where the knee_plot breaks. For the elimination of repetitions let’s take the median.   I don’t know how this package works (looks for a derivative, but I don’t know how it smoothes it out, it’s math), but it says that the breaking is more likely at 60-80 days.

(I corrected the errors - got closer to your data)

https://github.com/agentlans/KneeArrower - here’s where you can read about math

# resp_data %>% 
#   filter(!day == 0) %>% 
#   group_by(day) %>% 
#   summarise(median_control = median(control),
#             median_straw = median(straw)) %>% 
#   mutate(rel = median_straw/median_control) %>% 
#   ggplot() +
#   geom_point(aes(x = day, y = rel))

resp_median <- resp_data %>% 
  filter(!day == 0) %>% 
  group_by(day) %>% 
  summarise(median_control = median(control),
            median_straw = median(straw)) %>% 
  mutate(rel = median_straw/median_control)


thresholds <- c(0.25, 0.5, 0.75, 1)

# Find cutoff points at each threshold
cutoff.points <- lapply(thresholds, function(i) {
  KneeArrower::findCutoff(resp_median$day, resp_median$rel, method="first", i)
})

x.coord <- sapply(cutoff.points, function(p) p$x)
y.coord <- sapply(cutoff.points, function(p) p$y)

# Plot the cutoff points on the scatterplot
plot(resp_median$day, resp_median$rel, pch=20, col="gray")
points(x.coord, y.coord, col="red", pch=20)
text(x.coord, y.coord, labels=thresholds, pos=4, col="red")

period_data <- period.data %>%
  mutate(bag_id = as.factor(bag_id))

period_data
## # A tibble: 15 × 2
##    bag_id   day
##    <fct>  <dbl>
##  1 1          3
##  2 2          7
##  3 3         14
##  4 4         21
##  5 5         28
##  6 6         35
##  7 7         49
##  8 8         63
##  9 9         77
## 10 10        91
## 11 11       105
## 12 12       119
## 13 13       140
## 14 14       161
## 15 15       182

0.8.3 alpha/respiration.

Well, I don’t really understand what to do next - attach the clusters to this picture?

resp_median_bags <- resp_median %>% 
  left_join(period.data, by="day") %>% 
  mutate(bag_id = as.factor(bag_id))

ps.f.r <- rarefy_even_depth(ps.f)

estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed = scale(Observed),
    Shannon = scale(Shannon),
    InvSimpson = scale(InvSimpson),
    Respiration = scale(rel)
  ) %>% 
  select(c(bag_id, Observed, Shannon, InvSimpson, Respiration)) %>% 
  pivot_longer(c("Observed", "Shannon", "InvSimpson", "Respiration")) %>% 
  mutate(metric = factor(name, levels = c("Observed", "Shannon", "InvSimpson", "Respiration"))) %>% 
  ggplot(aes(y = value, x = bag_id, group = name)) +
  geom_line(aes(color = name),
            alpha = 0.8) +
  theme_bw() +
    scale_x_discrete(labels=(c("1"="3",
                                "3"="14",
                                "5"="28",
                                "7"="49" ,
                                "8"="63",
                                "10"="91",
                                "12"="119",
                                "13"="140",
                                "14"="161",
                                "15"="182")
         )) +
  labs(
    x = "Days",
    y = "z-scaled mean values",
    colour = ""
   )

Correlation is negative, weak, not particularly reliable, and only Pearson (which works for a normal distribution (we have a time series, we need a spearman for good measure))

alpha_resp <- phyloseq::estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed_scaled = scale(Observed),
    Shannon_scaled = scale(Shannon),
    InvSimpson_scaled = scale(InvSimpson),
    Respiration_scaled = scale(rel)
  ) %>% 
  select(c(bag_id, Observed_scaled, Shannon_scaled, InvSimpson_scaled, Respiration_scaled))

cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## S = 240, p-value = 0.1909
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4545455
cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## S = 242, p-value = 0.1782
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4666667
cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## S = 244, p-value = 0.1661
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4787879
cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## t = -2.6675, df = 8, p-value = 0.02847
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.91881253 -0.09942732
## sample estimates:
##        cor 
## -0.6861022
cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## t = -2.8858, df = 8, p-value = 0.02033
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9269747 -0.1536306
## sample estimates:
##        cor 
## -0.7141749
cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## t = -2.3261, df = 8, p-value = 0.04845
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.903485261 -0.009279276
## sample estimates:
##        cor 
## -0.6351945

0.9 Export all phyloseq object in one table

In the table there is a column cluster - “exl” in it means that this is the part of the dataset that did not go to WGCNA - majors in single samples(OMs).

ps.m <- phyloseq::psmelt(ps.f)

ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-c("Group", "Bag")) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  a <- taxa_names(l_vst[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.data.out.tsv", sep = "\t", row.names = FALSE)
ps.data.out.exl

0.9.1 Several pictures on the dynamics of representation of individual phyla

Bdellovibrionota - predators, an indicator of a developed community

ps.m %>% 
  filter(Phylum == "Bdellovibrionota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Myxococcota - sort of the same, but as we know can be cellulotic(facultative predators)

ps.m %>% 
  filter(Phylum == "Myxococcota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Археи появляются тоже на поздних стадиях - вообще я бы хотел бы опять развить тему важности азотного метаболизма на поздних стадиях разложения.
Оч хочется метагеном, но не этот.
Кроме того хочется отметить, что эти минорные группы возникают на D12

ps.m %>% 
  filter(Phylum == "Crenarchaeota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Gammaproteobacteria - they are a third of the cluster blue

ps.m %>% 
  filter(Class == "Gammaproteobacteria") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()
## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

0.9.2 Boxplots by phyla

All phyla - the representation is logarithmic on the basis of 2
Separate points are sums of absolute values of reads by days
The sums are logarithmic, but not the individual phylotypes

#select only major phylums
top_phylum <- ps.m %>% 
  count(Phylum) %>% 
  arrange(desc(n)) %>% 
  top_n(10) %>% 
  pull(Phylum)
## Selecting by n
ps.m %>% 
  filter(Phylum %in% top_phylum) %>% 
  mutate(
    Phylum = as.character(Phylum),
    Class = as.character(Class),
    phylum = ifelse(Phylum == "Proteobacteria", Class, Phylum)
  ) %>% 
  group_by(Description, Day, phylum) %>% 
  filter(!is.na(phylum)) %>% 
  summarise(Bs = log2(sum(Abundance))) %>% 
  ggplot(aes(x = Day, y = Bs)) +
  geom_boxplot(fill="#4DBBD5B2", alpha=0.4) +
  theme_bw() +
  facet_wrap(~ phylum)
## `summarise()` has grouped output by 'Description', 'Day'. You can override
## using the `.groups` argument.
## Warning: Removed 54 rows containing non-finite values (`stat_boxplot()`).

0.9.3 resperation and phylotypes correlations

little_res <- resp_median_bags %>% 
  filter(day %in% ps.varstab@sam_data$Day)

ps.varstab.merged <- phyloseq::merge_samples(ps.varstab, "Day")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
otus_cor <- ps.varstab.merged@otu_table %>% 
  as.data.frame()

s1 <- sapply(colnames(otus_cor), function(x) {
  cor <- cor.test(otus_cor[[x]], little_res$rel, method="spearman", exact = FALSE)
  c1 <- cor$p.value
  c2 <- cor$estimate %>% unname()
  return(list("p.value"=c1, "rho"=c2))})

s1 <- s1 %>% 
  t() %>% 
  as.data.frame() %>% 
  unnest(cols = c(p.value, rho), .id = "id") %>% 
  column_to_rownames("id") %>% 
  mutate(p.adj = p.adjust(p.value, method = "BH", n = length(p.value)))


taxa <- as.data.frame(ps.varstab@tax_table@.Data) %>% 
  rownames_to_column("ID")

s1 %>%
  rownames_to_column("ID") %>% 
  left_join(taxa) %>% 
  arrange(rho) %>%
  filter(Phylum %in% c("Proteobacteria", "Acidobacteriota", "Actinobacteriota","Chloroflexi", "Planctomycetota", "Bacteroidota", "Verrucomicrobiota", "Firmicutes")) %>% 
  mutate(Rank = ifelse(Phylum == "Proteobacteria", Class, Phylum)) %>%
  ggplot(aes(x=rho, color=Rank, fill=Rank)) +
  geom_histogram(alpha=0.3, bins = 30) +
  theme_minimal() +
  facet_wrap(~ Rank) +
  theme(legend.position="none")
## Joining with `by = join_by(ID)`

s1.otus <- s1 %>% 
  rownames_to_column("OTU")
ps.data.out.exl %>% 
  select_if(Negate(is.integer)) %>% 
  distinct() %>% 
  left_join(s1.otus, by="OTU") %>%
  filter(rho >= 0) %>%
  arrange(p.adj) %>% 
  head() %>% 
  DT::datatable()

0.10 Additions

Part added after rounds of the peer-review. Thank you so much to reviewer two, you are great!

realtime.stat <- readxl::read_excel("cell_realtime_stat_ph.xlsx")
realtime.names <- readxl::read_excel("cell_realtime_stat_gh_names.xlsx")
gh_taxa <- readr::read_csv("kraken_gh_full_contig_207_corrected.csv") %>% 
  mutate_if(., is.character, as.factor)
## Rows: 23 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Cellulase, Taxa_Genus, GH_Family, Taxa_Family, Contig_name, Family_...
## dbl (1): Contig_identifier
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gh_taxa %>% 
  DT::datatable(caption = "Kraken2 based taxonomy for GH containing contigs")
reatl.m <- realtime.stat %>% 
  mutate(Rows = paste0(phase, "_", rep(seq(1:9), 3))) %>% 
  relocate(Rows) %>% 
  select(-phase)  %>% 
  column_to_rownames("Rows") %>% 
  select_if(is.numeric) %>% 
  as.matrix()

names.d <- gh_taxa %>% 
  rename("label" = "Cellulase") %>% 
  mutate(name = paste0(Genus_new, " ", GH_Family)) %>% 
  select(c("label", "Family_new", "name"))

# reatl.m[reatl.m < 0] <- 0
hcl <- hclust(vegan::vegdist(t(reatl.m), method="euclidean"), "mcquitty")

dhc <- as.dendrogram(hcl)
ddata <- ggdendro::dendro_data(dhc, type = "rectangle")
ddata$labels <- left_join(ddata$labels, names.d, by = "label")

ggplot(ggdendro::segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  geom_text(data = ddata$labels, 
              aes(x = x, y = y, label = name, color = Family_new), size = 4, hjust = -0.1) +
  coord_flip() + 
  scale_y_reverse(expand = c(1.1, 0)) +
  scale_color_brewer(palette="Dark2") +
  ggdendro::theme_dendro() +
    guides(order = 1,
         fill = guide_legend(override.aes = list(shape = 55, size = 9), 
                             shape = guide_legend(order = 44))) +
  theme(legend.position="bottom",
        legend.title = element_blank(),
        legend.key.size=unit(0.56, 'lines'),
        plot.title = element_text(hjust = 1)) +
  ggtitle("WPGMA based on euclidean distance of GH's RealTime")

Add contigs name

names.d <- gh_taxa %>% 
  rename("label" = "Cellulase") %>% 
  mutate(name = paste0(Genus_new, " ", GH_Family, " ", Contig_identifier)) %>% 
  select(c("label", "Family_new", "name"))

# reatl.m[reatl.m < 0] <- 0
hcl <- hclust(vegan::vegdist(t(reatl.m), method="euclidean"), "mcquitty")

dhc <- as.dendrogram(hcl)
ddata <- ggdendro::dendro_data(dhc, type = "rectangle")
ddata$labels <- left_join(ddata$labels, names.d, by = "label")

ggplot(ggdendro::segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  geom_text(data = ddata$labels, 
              aes(x = x, y = y, label = name, color = Family_new), size = 4, hjust = -0.1) +
  coord_flip() + 
  scale_y_reverse(expand = c(1.1, 0)) +
  scale_color_brewer(palette="Dark2") +
  ggdendro::theme_dendro() +
    guides(order = 1,
         fill = guide_legend(override.aes = list(shape = 55, size = 9), 
                             shape = guide_legend(order = 44))) +
  theme(legend.position="bottom",
        legend.title = element_blank(),
        legend.key.size=unit(0.56, 'lines'),
        plot.title = element_text(hjust = 1)) +
  ggtitle("WPGMA based on euclidian distance of GH's RealTime")

cor_test_mat <- psych::corr.test(realtime_matrix)$p
corrplot::corrplot(m, p.mat = cor_test_mat, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1
# text(p1$x, p1$y, round(p1$corr, 2))

ddata
library(ggpubr)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
real_long <- pivot_longer(realtime.stat, colnames(realtime.stat)[2:length(colnames(realtime.stat))]) %>%
  mutate(phase = factor(phase, levels = c("early", "middle", "late")))

stat.test <- real_long %>%
  group_by(name) %>%
  tukey_hsd(value ~ phase) %>%
  add_xy_position(x = "name",
                  dodge = 0.8,
                  step.increase = 0.05)

real_long %>% 
  ggplot(aes(x=name, y=value, col=phase)) +
    geom_boxplot() +
    geom_point(size=0.4, alpha=0.9, position=position_dodge(width=0.75),aes(group=phase)) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) + 
  labs( title = "Realtime of selected GH") +
  ylab("log2foldchange") +
  stat_pvalue_manual(
    stat.test,
    label = "p.adj.signif", 
    tip.length = 0.005,
    hide.ns = TRUE,
    bracket.nudge.y = -0.5
  )

realtime.big <- readxl::read_excel("straw_rt_stat_gr.xlsx")
## New names:
## • `` -> `...1`
realtime.big <- realtime.big %>% 
  group_by(day) %>% 
  mutate_at(vars(FUN), 
            ~replace_na(., 
                        mean(., na.rm = TRUE)))

realtime.big.long <- realtime.big %>% 
  filter(phase != "no") %>% 
  pivot_longer(colnames(realtime.big)[4:length(colnames(realtime.big))]) %>% 
  mutate(phase = factor(phase, levels = c("early", "middle", "late"))) %>% 
  mutate(value = log2(value)) 

stat.test <- realtime.big.long %>%
  group_by(name) %>%
  tukey_hsd(value ~ phase) %>%
  add_xy_position(x = "name", dodge = 0.5)

realtime.big.long %>% 
  ggplot(aes(x=name, y=value, col=phase)) +
    geom_boxplot() +
    geom_point(size=0.4, alpha=0.9, position=position_dodge(width=0.75),aes(group=phase)) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) + 
  labs(
  title = "Realtime"
) +
ylab("operons per gram, log2") +
  stat_pvalue_manual(
  stat.test,  label = "{p.adj} {p.adj.signif}", 
  tip.length = 0.005, hide.ns = TRUE
  )

Permanova based on GH specific RealTime data

real_matrix <- realtime.stat %>% 
  select(-phase) %>% 
  as.matrix() %>% 
  t()

gh_meta <- gh_taxa %>% 
  column_to_rownames("Cellulase") %>% 
  select(GH_Family, Genus_new, Family_new) 

meta <- gh_taxa %>% 
  select(GH_Family, Genus_new, Family_new) %>% 
  t() %>% 
  as.data.frame() %>% 
  setNames(gh_taxa$Cellulase)
  
d <- dist(real_matrix, method = "euclidian")

table1 <- phyloseq::distance(ps.f, "bray")
table2 <- as(sample_data(ps.f@sam_data), "data.frame")

list(
  vegan::adonis2(d ~ GH_Family + Genus_new + Family_new, data = gh_meta),
  vegan::adonis2(d ~ Genus_new  + GH_Family, data = gh_meta),
  vegan::adonis2(d ~ GH_Family + Genus_new, data = gh_meta),
  vegan::adonis2(d ~ GH_Family*Genus_new, data = gh_meta)
)
## [[1]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family + Genus_new + Family_new, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)   
## GH_Family  7   3479.3 0.41290 3.1557  0.009 **
## Genus_new  6   3529.7 0.41888 3.7349  0.005 **
## Residual   9   1417.6 0.16823                 
## Total     22   8426.6 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[2]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ Genus_new + GH_Family, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)    
## Genus_new  6   5268.9 0.62527 5.5753  0.001 ***
## GH_Family  7   1740.1 0.20650 1.5783  0.162    
## Residual   9   1417.6 0.16823                  
## Total     22   8426.6 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[3]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family + Genus_new, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)   
## GH_Family  7   3479.3 0.41290 3.1557  0.013 * 
## Genus_new  6   3529.7 0.41888 3.7349  0.007 **
## Residual   9   1417.6 0.16823                 
## Total     22   8426.6 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[4]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family * Genus_new, data = gh_meta)
##                     Df SumOfSqs      R2      F Pr(>F)  
## GH_Family            7   3479.3 0.41290 3.0006  0.036 *
## Genus_new            6   3529.7 0.41888 3.5514  0.017 *
## GH_Family:Genus_new  4    589.3 0.06994 0.8894  0.561  
## Residual             5    828.2 0.09829                
## Total               22   8426.6 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.11 ITS

Same analysis, but much quicker and dirtier.

library(DESeq2)

ps_its_bulk <- readRDS("../../its/d2/ps_its")

sample.data <- ps_its_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", "late"))) %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late"))) %>% 
  mutate(Day = Day %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps_its_bulk) <- sample.data

ps_its <- prune_samples(!sample_data(ps_its_bulk)$Day %in% "bulk", ps_its_bulk)
ps_its  <- prune_taxa(taxa_sums(ps_its) > 0, ps_its)

amp_its <- phyloseq_to_ampvis2(ps_its)

amp_its_bulk <- phyloseq_to_ampvis2(ps_its_bulk)

ps.its.inall <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
ps.its.inall <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)
Biostrings::writeXStringSet(ps.its.inall@refseq, file="ref_inall.fasta")
Biostrings::writeXStringSet(ps_its@refseq, file="ref.fasta")

tree <- ape::read.tree("../../its/d2/al.fasta.contree")

ps.its.inall@phy_tree <- tree
ps.its.inall  <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)


ps.its.exl  <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.its.exl  <- prune_taxa(taxa_sums(ps.its.exl) > 100, ps.its.exl)
ps.its.exl.taxa <- taxa_names(ps.its.exl)

amp_its_inall <- phyloseq_to_ampvis2(ps.its.inall)

ps_its@sam_data %>% 
  DT::datatable()

Base statistics for its

With bulk soil

amp_its_bulk
## ampvis2 object with 4 elements. 
## Summary of OTU table:
## [1]       42     3178   460040     1355    39372   8278.5 10953.33
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##    1379(43%) 1213(38.17%) 1127(35.46%) 1086(34.17%) 1046(32.91%)   998(31.4%) 
##      Species 
##  482(15.17%) 
## 
## Metadata variables: 4 
##  SampleID, Day, Description, Group
amp_its
## ampvis2 object with 4 elements. 
## Summary of OTU table:
## [1]     36   1264 275148   1355  14380   7211   7643
## 
## Assigned taxonomy:
##     Kingdom      Phylum       Class       Order      Family       Genus 
##    416(33%) 325(25.71%) 279(22.07%) 263(20.81%) 247(19.54%) 233(18.43%) 
##     Species 
## 162(12.82%) 
## 
## Metadata variables: 4 
##  SampleID, Day, Description, Group
amp_heatmap(amp_its_bulk,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

amp_heatmap(amp_its_bulk,
            tax_show = 7,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_add = "Kingdom",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

Major ITS phylotypes

amp_heatmap(amp_its,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

All by the phylums

amp_heatmap(amp_its,
            tax_show = 7,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_add = "Kingdom",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

There are going to WGCNA pipeline(116 phylotypes)

amp_heatmap(amp_its_inall,
            tax_show = 40 ,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis

beta_custom_norm_NMDS_elli_w(ps_its_bulk, Group="Day", Color="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1414574 
## Run 1 stress 0.1516495 
## Run 2 stress 0.1420709 
## Run 3 stress 0.1414477 
## ... New best solution
## ... Procrustes: rmse 0.003887442  max resid 0.01937902 
## Run 4 stress 0.1536413 
## Run 5 stress 0.141642 
## ... Procrustes: rmse 0.01429757  max resid 0.06532445 
## Run 6 stress 0.1421837 
## Run 7 stress 0.1414477 
## ... Procrustes: rmse 0.00001939437  max resid 0.00006024144 
## ... Similar to previous best
## Run 8 stress 0.1537918 
## Run 9 stress 0.1414448 
## ... New best solution
## ... Procrustes: rmse 0.002782189  max resid 0.01293661 
## Run 10 stress 0.1414467 
## ... Procrustes: rmse 0.006632629  max resid 0.03482667 
## Run 11 stress 0.1444901 
## Run 12 stress 0.1532385 
## Run 13 stress 0.1443772 
## Run 14 stress 0.1414574 
## ... Procrustes: rmse 0.004649221  max resid 0.01894334 
## Run 15 stress 0.1519091 
## Run 16 stress 0.1415836 
## ... Procrustes: rmse 0.01176699  max resid 0.06797239 
## Run 17 stress 0.1441972 
## Run 18 stress 0.1531946 
## Run 19 stress 0.1523984 
## Run 20 stress 0.1524871 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     19: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin

beta_custom_norm_NMDS_elli_w(ps_its, Group="Day", Color="Day")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1873573 
## Run 1 stress 0.1890853 
## Run 2 stress 0.1877849 
## ... Procrustes: rmse 0.06797956  max resid 0.3258443 
## Run 3 stress 0.1873573 
## ... Procrustes: rmse 0.000002643625  max resid 0.000008942489 
## ... Similar to previous best
## Run 4 stress 0.2246903 
## Run 5 stress 0.1910365 
## Run 6 stress 0.1886435 
## Run 7 stress 0.220317 
## Run 8 stress 0.1873573 
## ... Procrustes: rmse 0.000002489462  max resid 0.00001005272 
## ... Similar to previous best
## Run 9 stress 0.1873573 
## ... New best solution
## ... Procrustes: rmse 0.000002900432  max resid 0.00001212819 
## ... Similar to previous best
## Run 10 stress 0.1881727 
## Run 11 stress 0.1873573 
## ... New best solution
## ... Procrustes: rmse 0.000003703707  max resid 0.00001672818 
## ... Similar to previous best
## Run 12 stress 0.2203009 
## Run 13 stress 0.1881727 
## Run 14 stress 0.2098111 
## Run 15 stress 0.1910365 
## Run 16 stress 0.1877849 
## ... Procrustes: rmse 0.06797676  max resid 0.3258368 
## Run 17 stress 0.1873573 
## ... Procrustes: rmse 0.00003059685  max resid 0.0001532429 
## ... Similar to previous best
## Run 18 stress 0.1881727 
## Run 19 stress 0.1881727 
## Run 20 stress 0.1891497 
## *** Best solution repeated 2 times

With bulk soil

p1 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

Without bulk soil

p1 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

The first plot is clr normalization (compositional), the second is vst(DESeq2). In contrast to bacteria - for vst WGCNA did not show good results (no plateau yield).
We still applied vst stabilization, but we have to keep in mind that the resulting output is not great.

otu.inall <- as.data.frame(ps.its.inall@otu_table@.Data) 
ps.inall.clr <- ps.its.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")
## pickSoftThreshold: will use block size 68.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 68 of 68
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.05110 -6.85         0.6370 33.1000 33.200000 34.400
## 2      2  0.00907  1.51         0.6780 17.1000 17.100000 18.600
## 3      3  0.24300 -3.64         0.7670  9.2400  9.200000 11.000
## 4      4  0.58200 -4.21         0.6270  5.2100  5.110000  7.230
## 5      5  0.16800 -8.96        -0.0691  3.0800  2.910000  5.190
## 6      6  0.18400 -6.75        -0.0474  1.9100  1.700000  4.000
## 7      7  0.25400 -6.07         0.0954  1.2500  1.020000  3.260
## 8      8  0.26300 -5.21         0.1110  0.8510  0.625000  2.760
## 9      9  0.25800 -4.44         0.1060  0.6080  0.404000  2.410
## 10    10  0.25700 -3.88         0.1160  0.4520  0.270000  2.140
## 11    12  0.26300 -4.44         0.0797  0.2770  0.131000  1.750
## 12    13  0.24600 -3.92         0.0718  0.2260  0.092900  1.600
## 13    14  0.24600 -3.64         0.0780  0.1870  0.065400  1.480
## 14    15  0.24900 -3.50         0.0837  0.1580  0.046500  1.370
## 15    16  0.24700 -3.35         0.0873  0.1350  0.033400  1.270
## 16    17  0.24600 -3.23         0.0902  0.1170  0.024300  1.180
## 17    18  0.24900 -3.16         0.0964  0.1020  0.017800  1.100
## 18    19  0.26000 -3.64         0.0650  0.0899  0.013200  1.030
## 19    20  0.24300 -3.46         0.0444  0.0796  0.009800  0.962
## 20    21  0.24800 -3.45         0.0510  0.0708  0.007320  0.902
## 21    22  0.25300 -3.36         0.0584  0.0633  0.005490  0.847
## 22    23  0.25200 -3.26         0.0591  0.0569  0.004140  0.797
## 23    24  0.25200 -3.16         0.0596  0.0512  0.003130  0.751
## 24    25  0.25900 -3.21         0.0656  0.0463  0.002370  0.708
## 25    26  0.26300 -3.14         0.0712  0.0420  0.001800  0.668
## 26    27  0.25700 -3.39         0.0457  0.0383  0.001370  0.632
## 27    28  0.26300 -3.46         0.0544  0.0349  0.001050  0.598
## 28    29  0.25900 -3.46         0.0499  0.0319  0.000799  0.566
## 29    30  0.26400 -3.41         0.0559  0.0293  0.000612  0.536
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

ps.varstab <- ps_vst(ps.its.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")
## pickSoftThreshold: will use block size 68.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 68 of 68
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1    1.0   0.3680  0.881         0.7030   8.490   8.35000  12.80
## 2    1.5   0.0705  0.305         0.6220   5.500   5.26000   9.62
## 3    2.0   0.0623 -0.354         0.1520   3.880   3.65000   7.92
## 4    2.5   0.2430 -0.521         0.2930   2.920   2.63000   6.87
## 5    3.0   0.4830 -0.687         0.4270   2.310   1.91000   6.17
## 6    3.5   0.5750 -0.838         0.4820   1.910   1.43000   5.72
## 7    4.0   0.5600 -0.897         0.4340   1.620   1.08000   5.41
## 8    4.5   0.0757 -1.690        -0.1430   1.410   0.81600   5.18
## 9    5.0   0.0816 -1.660        -0.1350   1.260   0.64800   5.00
## 10   5.5   0.1680 -3.190        -0.0669   1.130   0.51500   4.85
## 11   6.0   0.1570 -3.480        -0.0604   1.040   0.41400   4.73
## 12   6.5   0.1570 -3.350        -0.0651   0.963   0.34000   4.63
## 13   7.0   0.1630 -3.310        -0.0593   0.900   0.27100   4.54
## 14   7.5   0.1670 -3.340        -0.0540   0.847   0.21400   4.46
## 15   8.0   0.1640 -3.260        -0.0578   0.802   0.17300   4.39
## 16   8.5   0.1020 -2.670        -0.0433   0.764   0.14200   4.32
## 17   9.0   0.1330 -2.920        -0.0212   0.730   0.11600   4.26
## 18   9.5   0.0611 -1.740         0.1050   0.701   0.09720   4.20
## 19  10.0   0.0597 -1.780         0.1240   0.675   0.08180   4.14
## 20  11.0   0.0674 -1.800         0.1270   0.630   0.05830   4.03
## 21  12.0   0.0358 -1.140         0.1310   0.593   0.04190   3.93
## 22  13.0   0.0003 -0.122         0.3070   0.562   0.02940   3.84
## 23  14.0   0.0693 -1.780         0.0436   0.536   0.02040   3.76
## 24  15.0   0.0414 -1.440         0.2380   0.512   0.01430   3.68
## 25  16.0   0.0458 -1.450         0.2260   0.491   0.01000   3.60
## 26  17.0   0.0509 -1.470         0.2150   0.473   0.00715   3.52
## 27  18.0   0.0398 -1.160         0.0947   0.456   0.00516   3.45
## 28  19.0   0.0443 -1.170         0.0743   0.440   0.00374   3.38
## 29  20.0   0.0475 -1.180         0.0675   0.426   0.00271   3.32
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)

net3 <- WGCNA::blockwiseModules(data2,
                          power=4,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)
##      mergeCloseModules: less than two proper modules.
##       ..color levels are grey, turquoise
##       ..there is nothing to merge.
mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)

The green cluster is the background, which in the case of the red cluster for bacteria that here, I don’t think it can be called a cluster at all.
WGCNA as an analysis helps clustering based on correlations by finding a soft threshold(power, see plot where the red numbers still can’t reach the plateau),
the darkgreen and red cluster are simply unconnected phylotypes.

modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "day",
       y = "normalized abundance")

p + scale_color_manual(values = levels(submod_df$module)) +
  scale_x_discrete(labels=(c("D01"="3",
                                "D03"="14",
                                "D05"="28",
                                "D07"="49" ,
                                "D08"="63",
                                "D10"="91",
                                "D12"="119",
                                "D13"="140",
                                "D14"="161",
                                "D15"="182")
         ))

l_its <- color_filt_broken(ps_its, submod_df, ps.its.inall)
l_its

$darkgreen \(darkgreen\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 25 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 3 sample variables ] tax_table() Taxonomy Table: [ 25 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 25 reference sequences ]

\(darkgreen\)amp ampvis2 object with 4 elements. Summary of OTU table: [1] 36 25 84467 0 6856 1935.5 2346.31

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 16(64%) 12(48%) 9(36%) 7(28%) 7(28%) 7(28%) 6(24%)

Metadata variables: 4 SampleID, Day, Description, Group

\(darkgreen\)heat \(darkgreen\)heat_rel \(darkgreen\)tree \(darkgreen\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq1 k__Fungi p__Ascomycota c__Sordariomycetes o__Chaetosphaeriales f__Chaetosphaeriaceae g__Chloridium s__aseptatum
Seq12 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae g__Lecythophora s__canina
Seq16 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq13 k__Fungi NA NA NA NA NA NA
Seq69 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Clavicipitaceae g__Marquandomyces s__marquandii
Seq43 NA NA NA NA NA NA NA
Seq63 NA NA NA NA NA NA NA
Seq100 NA NA NA NA NA NA NA
Seq42 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Stachybotrys s__terrestris
Seq45 k__Fungi p__Ascomycota NA NA NA NA NA
Seq21 NA NA NA NA NA NA NA
Seq195 k__Fungi p__Ascomycota NA NA NA NA NA
Seq138 k__Fungi p__Ascomycota NA NA NA NA NA
Seq29 NA NA NA NA NA NA NA
Seq8 k__Fungi NA NA NA NA NA NA
Seq54 k__Fungi p__Ascomycota c__Leotiomycetes o__Helotiales f__Helotiaceae g__Scytalidium NA
Seq79 NA NA NA NA NA NA NA
Seq125 NA NA NA NA NA NA NA
Seq41 k__Fungi NA NA NA NA NA NA
Seq76 NA NA NA NA NA NA NA
Seq124 k__Fungi p__Ascomycota c__Sordariomycetes o__Chaetosphaeriales f__Chaetosphaeriaceae g__Chloridium s__aseptatum
Seq20 k__Fungi p__Ascomycota c__Sordariomycetes NA NA NA NA
Seq32 k__Fungi NA NA NA NA NA NA
Seq132 NA NA NA NA NA NA NA
Seq40 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Clavicipitaceae g__Marquandomyces s__marquandii

$salmon \(salmon\)ps phyloseq-class experiment-level object otu_table() OTU Table: [ 43 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 3 sample variables ] tax_table() Taxonomy Table: [ 43 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 43 reference sequences ]

\(salmon\)amp ampvis2 object with 4 elements. Summary of OTU table: [1] 36 43 116231 124 10633 2510.5 3228.64

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 40(93%) 40(93.02%) 37(86.05%) 37(86.05%) 36(83.72%) 33(76.74%) 29(67.44%)

Metadata variables: 4 SampleID, Day, Description, Group

\(salmon\)heat \(salmon\)heat_rel \(salmon\)tree \(salmon\)taxa
Kingdom Phylum Class Order Family Genus Species
Seq2 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium s__equiseti
Seq49 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq6 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq10 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium s__inaequale
Seq3 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq5 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq7 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Psathyrellaceae g__Coprinellus s__flocculosus
Seq19 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Psathyrellaceae g__Coprinellus s__flocculosus
Seq53 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq99 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae g__Parachaetomium s__iranianum
Seq30 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq50 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur s__kilbournensis
Seq78 k__Metazoa p__Nematoda c__Chromadorea o__Rhabditida f__Cephalobidae g__Acrobeloides NA
Seq101 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium s__inaequale
Seq4 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium s__oxysporum
Seq175 NA NA NA NA NA NA NA
Seq62 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur NA
Seq48 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Albifimbria s__verrucaria
Seq36 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq9 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Stachybotryaceae g__Albifimbria s__verrucaria
Seq11 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq74 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq89 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq33 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium s__oxysporum
Seq22 k__Fungi p__Basidiomycota c__Cystobasidiomycetes o__Cystobasidiales f__Cystobasidiaceae g__Occultifur s__kilbournensis
Seq31 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium NA
Seq28 k__Fungi p__Basidiomycota c__Agaricomycetes o__Corticiales f__Corticiaceae g__Waitea s__circinata
Seq46 NA NA NA NA NA NA NA
Seq85 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
Seq121 k__Fungi p__Ascomycota c__Sordariomycetes o__Coniochaetales f__Coniochaetaceae NA NA
Seq24 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae NA NA
Seq27 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Lasiosphaeriaceae g__Schizothecium s__inaequale
Seq66 k__Fungi p__Mucoromycota c__Mucoromycetes o__Mucorales f__Mucoraceae g__Actinomucor s__elegans
Seq114 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Psathyrellaceae g__Coprinellus s__flocculosus
Seq161 k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales NA NA NA
Seq96 NA NA NA NA NA NA NA
Seq15 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium s__equiseti
Seq94 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq155 k__Fungi p__Ascomycota c__Sordariomycetes o__Sordariales f__Chaetomiaceae NA NA
Seq152 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium s__oxysporum
Seq39 k__Fungi p__Ascomycota c__Sordariomycetes o__Hypocreales f__Nectriaceae g__Fusarium NA
Seq180 k__Viridiplantae p__Anthophyta NA NA NA NA NA
Seq52 k__Fungi p__Basidiomycota c__Agaricomycetes o__Agaricales f__Bolbitiaceae g__Conocybe s__crispa
p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

ps.m <- phyloseq::psmelt(ps_its)
ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-Group) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_its)) {
  a <- taxa_names(l_its[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.its.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.its.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.its.data.out.tsv", sep = "\t")
map_d_trial <- ps_its_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Bulk = ifelse(Day == "bulk", "bulk", "trial") %>% as.factor())

ps_its_bulk@sam_data <-  sample_data(map_d_trial)

amp_its_bulk_venn <- phyloseq_to_ampvis2(ps_its_bulk)
p.venn.cc <- amp_venn(amp_its_bulk_venn,
         group_by = "Bulk",
         normalise = TRUE,
         cut_a = 0.000001,
         cut_f=0.000001,
            )

p.venn.cc

# pw <- p.venn.cc

# ggarrange(pw, pw, pw, pw,pw, pw, pw, pw, ncol = 1, labels = paste0("venn", seq(1, 8)))

0.12 Packages info

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3         reshape2_1.4.4        KneeArrower_1.0.0    
##  [4] ggtree_3.6.2          heatmaply_1.4.2       viridis_0.6.2        
##  [7] viridisLite_0.4.1     plotly_4.10.1         ampvis2_2.7.28       
## [10] ggpubr_0.6.0          lubridate_1.9.2       forcats_1.0.0        
## [13] stringr_1.5.0         dplyr_1.1.0           purrr_1.0.1          
## [16] readr_2.1.4           tidyr_1.3.0           tibble_3.1.8         
## [19] ggplot2_3.4.1         tidyverse_2.0.0       phyloseq_1.42.0      
## [22] WGCNA_1.72-1          fastcluster_1.2.3     dynamicTreeCut_1.63-1
## 
## loaded via a namespace (and not attached):
##   [1] rsvd_1.0.5                     Hmisc_4.8-0                   
##   [3] svglite_2.1.1                  class_7.3-21                  
##   [5] foreach_1.5.2                  crayon_1.5.2                  
##   [7] rbibutils_2.2.13               MASS_7.3-58.2                 
##   [9] rhdf5filters_1.10.0            nlme_3.1-162                  
##  [11] backports_1.4.1                impute_1.72.3                 
##  [13] rlang_1.0.6                    compositions_2.0-5            
##  [15] XVector_0.38.0                 readxl_1.4.2                  
##  [17] irlba_2.3.5.1                  nloptr_2.0.3                  
##  [19] ca_0.71.1                      scater_1.26.1                 
##  [21] BiocParallel_1.32.5            bit64_4.0.5                   
##  [23] glue_1.6.2                     rngtools_1.5.2                
##  [25] parallel_4.2.0                 vipor_0.4.5                   
##  [27] AnnotationDbi_1.60.0           BiocGenerics_0.44.0           
##  [29] tidyselect_1.2.0               SummarizedExperiment_1.28.0   
##  [31] XML_3.99-0.13                  zoo_1.8-11                    
##  [33] xtable_1.8-4                   magrittr_2.0.3                
##  [35] evaluate_0.20                  Rdpack_2.4                    
##  [37] scuttle_1.8.4                  cli_3.6.0                     
##  [39] zlibbioc_1.44.0                rstudioapi_0.14               
##  [41] doRNG_1.8.6                    MultiAssayExperiment_1.24.0   
##  [43] bslib_0.4.2                    rpart_4.1.19                  
##  [45] treeio_1.22.0                  BiocSingular_1.14.0           
##  [47] xfun_0.37                      multtest_2.54.0               
##  [49] cluster_2.1.4                  TSP_1.2-2                     
##  [51] biomformat_1.26.0              ggfittext_0.9.1               
##  [53] KEGGREST_1.38.0                expm_0.999-7                  
##  [55] ggrepel_0.9.3                  ape_5.7                       
##  [57] dendextend_1.16.0              Biostrings_2.66.0             
##  [59] png_0.1-8                      permute_0.9-7                 
##  [61] withr_2.5.0                    bitops_1.0-7                  
##  [63] ggforce_0.4.1                  plyr_1.8.8                    
##  [65] cellranger_1.1.0               e1071_1.7-13                  
##  [67] coda_0.19-4                    pillar_1.8.1                  
##  [69] cachem_1.0.7                   Rmpfr_0.9-1                   
##  [71] multcomp_1.4-22                DelayedMatrixStats_1.20.0     
##  [73] vctrs_0.5.2                    ellipsis_0.3.2                
##  [75] generics_0.1.3                 tools_4.2.0                   
##  [77] foreign_0.8-84                 beeswarm_0.4.0                
##  [79] munsell_0.5.0                  tweenr_2.0.2                  
##  [81] emmeans_1.8.4-1                proxy_0.4-27                  
##  [83] DelayedArray_0.24.0            fastmap_1.1.1                 
##  [85] compiler_4.2.0                 abind_1.4-5                   
##  [87] DescTools_0.99.48              decontam_1.18.0               
##  [89] GenomeInfoDbData_1.2.9         lattice_0.20-45               
##  [91] deldir_1.0-6                   utf8_1.2.3                    
##  [93] jsonlite_1.8.4                 scales_1.2.1                  
##  [95] gld_2.6.6                      ScaledMatrix_1.6.0            
##  [97] tidytree_0.4.2                 carData_3.0-5                 
##  [99] sparseMatrixStats_1.10.0       estimability_1.4.1            
## [101] lazyeval_0.2.2                 car_3.1-1                     
## [103] doParallel_1.0.17              latticeExtra_0.6-30           
## [105] checkmate_2.1.0                rmarkdown_2.20                
## [107] sandwich_3.0-2                 cowplot_1.1.1                 
## [109] webshot_0.5.4                  treemapify_2.5.5              
## [111] Biobase_2.58.0                 igraph_1.4.1                  
## [113] survival_3.5-3                 numDeriv_2016.8-1.1           
## [115] yaml_2.3.7                     systemfonts_1.0.4             
## [117] ANCOMBC_2.0.2                  htmltools_0.5.4               
## [119] memoise_2.0.1                  locfit_1.5-9.7                
## [121] seriation_1.4.1                IRanges_2.32.0                
## [123] gmp_0.7-1                      digest_0.6.31                 
## [125] assertthat_0.2.1               registry_0.5-1                
## [127] RSQLite_2.3.0                  yulab.utils_0.0.6             
## [129] Exact_3.2                      data.table_1.14.8             
## [131] blob_1.2.3                     vegan_2.6-4                   
## [133] S4Vectors_0.36.2               preprocessCore_1.60.2         
## [135] splines_4.2.0                  Formula_1.2-5                 
## [137] labeling_0.4.2                 DECIPHER_2.26.0               
## [139] Rhdf5lib_1.20.0                RCurl_1.98-1.10               
## [141] broom_1.0.3                    hms_1.1.2                     
## [143] rhdf5_2.42.0                   colorspace_2.1-0              
## [145] base64enc_0.1-3                mnormt_2.1.1                  
## [147] ggbeeswarm_0.7.1               GenomicRanges_1.50.2          
## [149] aplot_0.1.9                    nnet_7.3-18                   
## [151] TreeSummarizedExperiment_2.6.0 sass_0.4.5                    
## [153] mia_1.6.0                      Rcpp_1.0.10                   
## [155] mvtnorm_1.1-3                  fansi_1.0.4                   
## [157] tzdb_0.3.0                     R6_2.5.1                      
## [159] grid_4.2.0                     lifecycle_1.0.3               
## [161] signal_0.7-7                   rootSolve_1.8.2.3             
## [163] ggsignif_0.6.4                 minqa_1.2.5                   
## [165] jquerylib_0.1.4                robustbase_0.95-0             
## [167] Matrix_1.5-3                   TH.data_1.1-1                 
## [169] RColorBrewer_1.1-3             iterators_1.0.14              
## [171] htmlwidgets_1.6.1              beachmat_2.14.0               
## [173] polyclip_1.10-4                crosstalk_1.2.0               
## [175] timechange_0.2.0               gridGraphics_0.5-1            
## [177] rvest_1.0.3                    mgcv_1.8-42                   
## [179] CVXR_1.0-11                    lmom_2.9                      
## [181] htmlTable_2.4.1                patchwork_1.1.2               
## [183] tensorA_0.36.2                 codetools_0.2-19              
## [185] matrixStats_0.63.0             GO.db_3.16.0                  
## [187] psych_2.2.9                    SingleCellExperiment_1.20.0   
## [189] GenomeInfoDb_1.34.9            gtable_0.3.1                  
## [191] DBI_1.1.3                      bayesm_3.1-5                  
## [193] stats4_4.2.0                   ggfun_0.0.9                   
## [195] httr_1.4.5                     highr_0.10                    
## [197] stringi_1.7.12                 vroom_1.6.1                   
## [199] farver_2.1.1                   annotate_1.76.0               
## [201] DT_0.27                        xml2_1.3.3                    
## [203] ggdendro_0.1.23                boot_1.3-28.1                 
## [205] BiocNeighbors_1.16.0           kableExtra_1.3.4.9000         
## [207] lme4_1.1-31                    interp_1.1-3                  
## [209] ade4_1.7-22                    geneplotter_1.76.0            
## [211] ggplotify_0.1.0                picante_1.8.2                 
## [213] energy_1.7-11                  DESeq2_1.38.3                 
## [215] DEoptimR_1.0-11                bit_4.0.5                     
## [217] jpeg_0.1-10                    MatrixGenerics_1.10.0         
## [219] pkgconfig_2.0.3                lmerTest_3.1-3                
## [221] gsl_2.1-8                      DirichletMultinomial_1.40.0   
## [223] rstatix_0.7.2                  corrplot_0.92                 
## [225] knitr_1.42