Straw – WGCNA

0.1 Preprocessing:

Basic dada2 pipeline for 16S:
truncLen=c(200, 180)
trimLeft=c(19,20)
maxEE=c(3,4)
truncQ=2
pool=pseudo
database - SILVA 138.1

Delete all bad taxa with parameters below:
annosighted on the phylum level +
chloroplasts/mithondria +
low reads/richness(based on the plot results)

tree from SEPP-QIIME2-plagin:

qiime tools import
–input-path ref_filt.fasta
–output-path rep_seq.qza
–type ‘FeatureData[Sequence]’ qiime fragment-insertion sepp
–i-representative-sequences rep_seq.qza –i-reference-database sepp-refs-silva-128.qza
–o-tree insertion-tree.qza
–o-placements insertion-placements.qza
–p-threads 40
unzip -p insertion-tree.qza /data/ > tree.nwk

ITS preprocessing:

dada2 with cutadapt trimming
maxEE=c(5,8)
truncQ = 8
minLen = 50
pool=“pseudo”
UNITE version - 10.05.2021

ML tree from IQ-TREE programm:

mafft –auto FASTA > FASTA_ALIGHNED
iqtree -s FASTA_ALIGHNED -nt 30 -B 1000

0.2 Import

Import phyloseq object and libraries
Dataset - straw decomposition time series - from factor Day - 10 levels Also in the dataset added bulk soils samples.

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(ANCOMBC)
library(heatmaply)
library(compositions)
library(igraph)
library(WGCNA)
require(DESeq2)
require(phyloseq)

ps_bulk <- readRDS("ps_bulk")

0.3 functions

#phyloseq object to ampvis2 object
#https://gist.github.com/KasperSkytte/8d0ca4206a66be7ff6d76fc4ab8e66c6

phyloseq_to_ampvis2 <- function(physeq) {
  #check object for class
  if(!any(class(physeq) %in% "phyloseq"))
    stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
  
  #ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
  if(is.null(physeq@tax_table))
    stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
  
  #OTUs must be in rows, not columns
  if(phyloseq::taxa_are_rows(physeq))
    abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
  else
    abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
  
  #tax_table is assumed to have OTUs in rows too
  tax <- phyloseq::tax_table(physeq)@.Data
  
  #merge by rownames (OTUs)
  otutable <- merge(
    abund,
    tax,
    by = 0,
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  )
  colnames(otutable)[1] <- "OTU"
  
  #extract sample_data (metadata)
  if(!is.null(physeq@sam_data)) {
    metadata <- data.frame(
      phyloseq::sample_data(physeq),
      row.names = phyloseq::sample_names(physeq), 
      stringsAsFactors = FALSE, 
      check.names = FALSE
    )
    
    #check if any columns match exactly with rownames
    #if none matched assume row names are sample identifiers
    samplesCol <- unlist(lapply(metadata, function(x) {
      identical(x, rownames(metadata))}))
    
    if(any(samplesCol)) {
      #error if a column matched and it's not the first
      if(!samplesCol[[1]])
        stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
    } else {
      #assume rownames are sample identifiers, merge at the end with name "SampleID"
      if(any(colnames(metadata) %in% "SampleID"))
        stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
      metadata$SampleID <- rownames(metadata)
      
      #reorder columns so SampleID is the first
      metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
    }
  } else
    metadata <- NULL
  
  #extract phylogenetic tree, assumed to be of class "phylo"
  if(!is.null(physeq@phy_tree)) {
    tree <- phyloseq::phy_tree(physeq)
  } else
    tree <- NULL
  
  #extract OTU DNA sequences, assumed to be of class "XStringSet"
  if(!is.null(physeq@refseq)) {
    #convert XStringSet to DNAbin using a temporary file (easiest)
    fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
    Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
  } else
    fastaTempFile <- NULL
  
  #load as normally with amp_load
  ampvis2::amp_load(
    otutable = otutable,
    metadata = metadata,
    tree = tree,
    fasta = fastaTempFile
  )
}

#variance stabilisation from DESeq2

ps_vst <- function(ps, factor){
  

  
  diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))              
  diagdds = estimateSizeFactors(diagdds, type="poscounts")
  diagdds = estimateDispersions(diagdds, fitType = "local") 
  pst <- varianceStabilizingTransformation(diagdds)
  pst.dimmed <- t(as.matrix(assay(pst))) 
  # pst.dimmed[pst.dimmed < 0.0] <- 0.0
  ps.varstab <- ps
  otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE) 
  return(ps.varstab)
} 

#WGCNA visualisation
#result - list class object with attributes:
# ps - phyloseq object
# amp - ampwis2 object
# heat - heatmap with absalute read numbers
# heat_rel - hetmap with relative abundances
# tree - phylogenetic tree with taxonomy

color_filt <- function(ps, df){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps, function(x) x / sum(x) * 100)
    ps.rel.col <- prune_taxa(color_name, ps.rel)
    amp.r <- phyloseq_to_ampvis2(ps.rel.col)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col@phy_tree
    taxa <- as.data.frame(ps.col@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa) %>%
                      kableExtra::kable_paper() %>%
                      kableExtra::scroll_box(width ="100%", height = "200px"))
                    
  }
  return(l)
}


color_filt_broken <- function(ps, df, ps.pruned){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    ps.col.pruned <- prune_taxa(color_name, ps.pruned)
    amp.col <- phyloseq_to_ampvis2(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
    amp.r <- phyloseq_to_ampvis2(ps.rel)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col.pruned@phy_tree
    taxa <- as.data.frame(ps.col.pruned@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 3) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p,
                   "taxa" = knitr::kable(taxa) %>%
                      kableExtra::kable_paper() %>%
                      kableExtra::scroll_box(width ="100%", height = "200px"))
  }
  return(l)
}

detachAllPackages <- function() {

  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")

  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]

  package.list <- setdiff(package.list,basic.packages)

  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE, force = TRUE)

}

plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <-  seq(max(er[[metric]]), length=length(stat.test$p.adj), by=max(er[[metric]]/20))

  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    ggpubr::stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}


# standart NMDS plot tool frop phyloseq with some additional aestatics 
# have stress value on plot - may work as fuck
beta_custom_norm_NMDS_elli_w <- function(ps, seed = 7888, normtype="vst", Color="What", Group="Repeat"){
  require(phyloseq)
  require(ggplot2)
  require(ggpubr)
  library(ggforce)
  
  
  ordination.b <- ordinate(ps, "NMDS", "bray")
  mds <- as.data.frame(ordination.b$points)
  p  <-  plot_ordination(ps,
                         ordination.b,
                         type="sample",
                         color = Color,
                         title="NMDS - Bray-Curtis",
                         # title=NULL,
                         axes = c(1,2) ) + 
    theme_bw() + 
    theme(text = element_text(size = 10)) + 
    geom_point(size = 3) +
    annotate("text",
    x=min(mds$MDS1) + abs(min(mds$MDS1))/7,
    y=max(mds$MDS2),
    label=paste0("Stress -- ", round(ordination.b$stress, 3))) +
    geom_mark_ellipse(aes_string(group = Group, label = Group),
                      label.fontsize = 10,
                      label.buffer = unit(2, "mm"),
                      label.minwidth = unit(5, "mm"),
                      con.cap = unit(0.1, "mm"),
                      con.colour='gray') +
    theme(legend.position = "none") +
    ggplot2::scale_fill_viridis_c(option = "H")
  
  return(p)
}


# alpha with aov + tukie post-hock - useless, but it looks pretty good
plot_alpha_w_toc <- function(ps, group, metric) {
  
  require(phyloseq)
  require(ggplot2)
  
  ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
  
  er <- estimate_richness(ps_a)
  df_er <- cbind(ps_a@sam_data, er)
  df_er <- df_er %>% select(c(group, metric))
  stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
    rstatix::tukey_hsd()
  y <- seq(max(er[[metric]]), length=length(stat.test$p.adj.signif[stat.test$p.adj.signif != "ns"]), by=max(er[[metric]]/20))
  
  plot_richness(ps_a, x=group, measures=metric) + 
    geom_boxplot() +
    geom_point(size=1.2, alpha=0.3) +
    stat_pvalue_manual(
      stat.test, 
      label = "p.adj.signif", 
      y.position = y,
      hide.ns=TRUE) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) +
    labs(y=paste(metric, "index")) 
}

lsf.str()

## beta_custom_norm_NMDS_elli_w : function (ps, seed = 7888, normtype = "vst", Color = "What", Group = "Repeat")  
## color_filt : function (ps, df)  
## color_filt_broken : function (ps, df, ps.pruned)  
## detachAllPackages : function ()  
## phyloseq_to_ampvis2 : function (physeq)  
## plot_alpha_w_toc : function (ps, group, metric)  
## ps_vst : function (ps, factor)

0.3.1 modify metadata

Add Group parameter to metadata -
- early - D01, D03, D05
- middle - D07, D08, D10
- late - D13, D14, D15

sample.data <- ps_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", 
                                 if_else(Day %in% c("D12", "D13", "D14", "D15"), "late", "bulk")))) %>% 
  dplyr::rename('Bag' = 'Day') %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late", "bulk"))) %>% 
  mutate(Day = Bag %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps_bulk) <- sample.data

 ps_bulk@sam_data %>% 
  DT::datatable(caption = "Sample data of phyloseq - 16S")

0.3.2 Select from the dataset only experemental part

ps.f <- prune_samples(!sample_data(ps_bulk)$Day %in% "bulk", ps_bulk)
ps.f  <- prune_taxa(taxa_sums(ps.f) > 0, ps.f)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

ps.f@sam_data  %>% 
  DT::datatable(caption = "Sample data of phyloseq - 16S")

0.4 Alpha diversity

With bulk soil

p1 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "Observed")

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(group)
## 
##   # Now:
##   data %>% select(all_of(group))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(metric)
## 
##   # Now:
##   data %>% select(all_of(metric))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

p2 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_bulk, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

Without bulk soil

p1 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps.f, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

p.observed <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

p.shannon <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'

p.simpson <- plot_alpha_w_toc(ps = ps.f, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

0.4.0.1 mpd

mpd - index of alpha diversity “tree shagginess” - counted separately because it is in a separate package with built-in validity (based on permutations)
here is a link about the meanin of values in the columns:
https://www.rdocumentation.org/packages/picante/versions/1.8.2/topics/ses.mpd
in general the early stage is significantly less diverse than the other stages

physeq_merged <- merge_samples(ps.f, "Group", fun=sum)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

# ps.f@sam_data

# picante::mpd(l_vst$blue$ps@otu_table@.Data, cophenetic(l_vst$blue$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)
# 
# picante::mpd(l_vst$salmon$ps@otu_table@.Data, cophenetic(l_vst$salmon$ps@phy_tree)) %>% 
#   mean(na.rm = TRUE)

mpd.res <- picante::ses.mpd(physeq_merged@otu_table@.Data, cophenetic(physeq_merged@phy_tree)) 

as.data.frame(mpd.res)

##        ntaxa  mpd.obs mpd.rand.mean mpd.rand.sd mpd.obs.rank  mpd.obs.z
## early    344 1.707396      1.841757  0.02880358            1 -4.6647217
## middle   445 1.832365      1.842819  0.02280044          335 -0.4585186
## late     687 1.852563      1.841550  0.01436611          786  0.7666258
##        mpd.obs.p runs
## early      0.001  999
## middle     0.335  999
## late       0.786  999

0.5 beta statistic

0.5.1 permanova

permanova - group significantlly different - dispersion between is more, than inside groups

dist <- phyloseq::distance(ps.f, "bray")
metadata <- as(sample_data(ps.f@sam_data), "data.frame")
vegan::adonis2(dist ~ Group, data = metadata)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = dist ~ Group, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Group     2   3.2510 0.33952 8.2247  0.001 ***
## Residual 32   6.3243 0.66048                  
## Total    34   9.5753 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.5.2 dynamic of beta diversity

Bray Curtis distance by steps between each day. (For example, all D15 versus D14.) Can we also put soil respiration on this picture as well?

ps.f.r <- rarefy_even_depth(ps.f, rngseed = 777)

avg.r <- ps.f.r@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

avg <- ps.f@otu_table %>% 
  as.data.frame() %>%
  vegan::avgdist(10)

# avg %>%
#   as.matrix() %>%
#   as_tibble(rownames= "sample") %>%
#   pivot_longer(-sample) %>%
#   filter(sample < name) %>%
#   mutate(repeat_a = str_replace(sample, ".*-", ""),
#          repeat_b = str_replace(name, ".*-", ""), 
#          day_a = as.numeric(str_replace(sapply(strsplit(sample, "-"), `[`, 3), "D", "")),
#           day_b = as.numeric(str_replace(sapply(strsplit(name, "-"), `[`, 3), "D", "")),
#          diff = abs(day_a - day_b),
#          early = day_a < 10) %>% 
#   filter(repeat_a == repeat_b & diff < 10) %>%
#   group_by(diff, repeat_a, early) %>%
#   summarize(median = median(value)) %>%
#   ungroup() %>%
#   ggplot(aes(x=diff, y=median, color=early, group=paste0(repeat_a, early))) +
#   geom_line(size=0.25) +
#   geom_smooth(aes(group=early), se=FALSE, size=4) +
#   labs(x="Distance between time points",
#        y="Median Bray-Curtis distance") +
#   scale_x_continuous(breaks=1:9) +
#   scale_color_manual(name=NULL,
#                      breaks=c(TRUE, FALSE),
#                      values=c("blue", "red"),
#                      labels=c("Early", "Late")) +
#   guides(color = guide_legend(override.aes = list(size=1))) +
#   theme_classic()

avg.r %>%
  as.matrix() %>%
  as_tibble(rownames= "sample") %>%
  pivot_longer(-sample) %>%
  filter(sample < name) %>%
  mutate(repeat_a = str_replace(sample, ".*-", ""),
         repeat_b = str_replace(name, ".*-", ""), 
         day_a = as.numeric(str_replace(sapply(strsplit(sample, "\\."), `[`, 3), "D", "")),
         day_b = as.numeric(str_replace(sapply(strsplit(name, "\\."), `[`, 3), "D", ""))) %>% 
  mutate(day_a = as.numeric(as.factor(day_a) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "13", "8" = "14", "9" = "14", "10" = "15")),
         day_b = as.numeric(as.factor(day_b) %>% forcats::fct_recode("2" = "3", "3" = "5", "4" = "7", "5" = "8", "6" = "10", "7" = "12", "8" = "13", "9" = "14", "10" = "15")),
         diff = abs(day_a - day_b)) %>% 
  filter(diff == 1) %>% 
  mutate(day_b = as.factor(day_b) %>% forcats::fct_recode("14" = "2", "28" = "3","49" = "4",  "63" = "5", "91" = "6", "161" = "9", "140" = "8", "182" = "10", "119" = "7")) %>% 
  ggplot(aes(x=day_b, y=value)) +
    geom_boxplot() +
  theme_bw() +
  labs(x="Days",
       y="Bray-Curtis distance")

0.5.3 basic NMDS plot

From the picture it looks like there is a gradual slowing down of changes -
but from the previous picture it follows that this is not the case - there is a dip between 7-8-10,
but on average the points differ quite consistently.

beta_custom_norm_NMDS_elli_w(ps.f, C="Group", G="Day")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1063746 
## Run 1 stress 0.1063746 
## ... New best solution
## ... Procrustes: rmse 0.00002091149  max resid 0.00007447643 
## ... Similar to previous best
## Run 2 stress 0.1427868 
## Run 3 stress 0.1099872 
## Run 4 stress 0.1396556 
## Run 5 stress 0.1063746 
## ... Procrustes: rmse 0.00000856638  max resid 0.00004570378 
## ... Similar to previous best
## Run 6 stress 0.1063746 
## ... Procrustes: rmse 0.00001753402  max resid 0.00006418496 
## ... Similar to previous best
## Run 7 stress 0.1411972 
## Run 8 stress 0.1063746 
## ... Procrustes: rmse 0.00004854709  max resid 0.0001716062 
## ... Similar to previous best
## Run 9 stress 0.1099872 
## Run 10 stress 0.1063499 
## ... New best solution
## ... Procrustes: rmse 0.00155448  max resid 0.006797445 
## ... Similar to previous best
## Run 11 stress 0.1448551 
## Run 12 stress 0.1099721 
## Run 13 stress 0.1482809 
## Run 14 stress 0.1438721 
## Run 15 stress 0.1099872 
## Run 16 stress 0.1409441 
## Run 17 stress 0.1393472 
## Run 18 stress 0.1099873 
## Run 19 stress 0.143872 
## Run 20 stress 0.1099872 
## *** Best solution repeated 1 times

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`

With bulk soil

beta_custom_norm_NMDS_elli_w(ps_bulk, C="Group", G="Day")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1106692 
## Run 1 stress 0.1099946 
## ... New best solution
## ... Procrustes: rmse 0.01525233  max resid 0.06644051 
## Run 2 stress 0.1137241 
## Run 3 stress 0.08974122 
## ... New best solution
## ... Procrustes: rmse 0.04386934  max resid 0.2071995 
## Run 4 stress 0.08965403 
## ... New best solution
## ... Procrustes: rmse 0.008190238  max resid 0.04762122 
## Run 5 stress 0.08965588 
## ... Procrustes: rmse 0.001782023  max resid 0.009088074 
## ... Similar to previous best
## Run 6 stress 0.1102175 
## Run 7 stress 0.08965558 
## ... Procrustes: rmse 0.001606968  max resid 0.008004172 
## ... Similar to previous best
## Run 8 stress 0.08974122 
## ... Procrustes: rmse 0.008190385  max resid 0.04759668 
## Run 9 stress 0.08975613 
## ... Procrustes: rmse 0.002920705  max resid 0.01334062 
## Run 10 stress 0.1138449 
## Run 11 stress 0.08974122 
## ... Procrustes: rmse 0.008190597  max resid 0.04758139 
## Run 12 stress 0.08965393 
## ... New best solution
## ... Procrustes: rmse 0.00008023188  max resid 0.0002988637 
## ... Similar to previous best
## Run 13 stress 0.0896541 
## ... Procrustes: rmse 0.0001167688  max resid 0.000450411 
## ... Similar to previous best
## Run 14 stress 0.08974128 
## ... Procrustes: rmse 0.008191234  max resid 0.04767439 
## Run 15 stress 0.1102175 
## Run 16 stress 0.08974364 
## ... Procrustes: rmse 0.008271512  max resid 0.04765294 
## Run 17 stress 0.08974366 
## ... Procrustes: rmse 0.008271187  max resid 0.04752359 
## Run 18 stress 0.08974381 
## ... Procrustes: rmse 0.008272414  max resid 0.04767895 
## Run 19 stress 0.08974381 
## ... Procrustes: rmse 0.008273279  max resid 0.0477807 
## Run 20 stress 0.08975604 
## ... Procrustes: rmse 0.002890954  max resid 0.01309561 
## *** Best solution repeated 2 times

0.5.3.1 AncomBC

map_d_trial <- ps_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Bulk = ifelse(Day == "bulk", "bulk", "trial") %>% as.factor())

ps_bulk@sam_data <-  sample_data(map_d_trial)

set.seed(123)

ancom <-  ANCOMBC::ancombc2(data = ps_bulk, 
  tax_level = NULL,
  fix_formula = "Bulk",
  rand_formula = NULL,
  p_adj_method = "fdr",
  pseudo = 0, 
  pseudo_sens = TRUE,
  prv_cut = 0.10,
  lib_cut = 1000,
  s0_perc = 0.05,
  group = "Bulk",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  n_cl = 15, 
  verbose = TRUE,
  global = TRUE, 
  pairwise = TRUE, 
  dunnet = TRUE, 
  trend = FALSE,
  iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
    nrow = 2,
    byrow = TRUE),
    matrix(c(-1, 0, 1, -1),
    nrow = 2,
    byrow = TRUE)),
   node = list(2, 2),
   solver = "ECOS",
  B = 100)
  )

ancom_family <-  ANCOMBC::ancombc2(data = ps_bulk, 
  fix_formula = "Bulk",
  rand_formula = NULL,
  p_adj_method = "fdr",
  pseudo = 0, 
  pseudo_sens = TRUE,
  prv_cut = 0.10,
  lib_cut = 1000,
  s0_perc = 0.05,
  group = "Bulk",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  n_cl = 15, 
  tax_level = "Family",
  verbose = TRUE,
  global = TRUE, 
  pairwise = TRUE, 
  dunnet = TRUE, 
  trend = FALSE,
  iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
    nrow = 2,
    byrow = TRUE),
    matrix(c(-1, 0, 1, -1),
    nrow = 2,
    byrow = TRUE)),
   node = list(2, 2),
   solver = "ECOS",
  B = 100)
  )

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Obtaining initial estimates ...

## ML iteration = 1, epsilon = 1.8

## ML iteration = 2, epsilon = 2.4e-14

## Estimating sample-specific biases ...

## ANCOM-BC2 primary results ...

## The sensitivity analysis for the pseudo-count addition ...

ancom_family$res %>% 
  group_by(diff_Bulktrial) %>% 
  summarise(n = n())

## # A tibble: 2 × 2
##   diff_Bulktrial     n
##   <lgl>          <int>
## 1 FALSE             31
## 2 TRUE              47

taxa_family <- ps_bulk@tax_table %>% 
  as.data.frame() %>% 
  select(-c("Genus", "Species")) %>% 
  rownames_to_column("ID") %>% 
  select(-ID) %>% 
  distinct() 
  
ancom_family$res %>% 
  filter(diff_Bulktrial == TRUE) %>% 
  select(c("taxon", "lfc_Bulktrial")) %>% 
  rename("taxon" = "Family") %>% 
  left_join(taxa_family, by="Family") %>% 
  mutate(`more in` = ifelse(lfc_Bulktrial > 0, "trial", "bulk") %>% as.factor(),
         area = 1) %>% 
  ggplot(aes(area = area, fill = `more in`, subgroup = Phylum, label = Family)) +
  treemapify::geom_treemap() +
  treemapify::geom_treemap_subgroup_border(color = "white") +
  treemapify::geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5, colour =
                             "black", fontface = "italic", min.size = 0) +
  treemapify::geom_treemap_text(colour = "white", place = "topleft", reflow = T) +
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position="bottom")

amp_bulk_venn <- phyloseq_to_ampvis2(ps_bulk)
p.venn.cc <- amp_venn(amp_bulk_venn,
         group_by = "Bulk",
         normalise = TRUE,
         cut_a = 0.000001,
         cut_f=0.000001,
            )

pw <- p.venn.cc
pw

# ggarrange(pw, pw, pw, pw,pw, pw, pw, pw, ncol = 1, labels = paste0("venn", seq(1, 8)))

0.6 Splitting the dataset

0.6.1 Basic information about dataset

With the bulk soil Phylum level

amp.b <- phyloseq_to_ampvis2(ps_bulk)
amp_heatmap(amp.b,
            tax_show = 22,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_class = "Proteobacteriota",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

ASV Level

amp_heatmap(amp.b,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

amp.b

## ampvis2 object with 5 elements. 
## Summary of OTU table:
## [1]       41     2062   624236     4412    36986    13850 15225.27
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   2062(100%)   2062(100%)   2062(100%)   2062(100%) 2047(99.27%) 1309(63.48%) 
##      Species 
##   114(5.53%) 
## 
## Metadata variables: 6 
##  SampleID, Bag, Description, Group, Day, Bulk

Without bulk

amp <- phyloseq_to_ampvis2(ps.f)
amp

## ampvis2 object with 5 elements. 
## Summary of OTU table:
## [1]       35     1063   510548     4412    32228    12752 14587.09
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##   1063(100%)   1063(100%)   1063(100%)   1063(100%) 1054(99.15%)  736(69.24%) 
##      Species 
##    90(8.47%) 
## 
## Metadata variables: 5 
##  SampleID, Bag, Description, Group, Day

amp_heatmap(amp,
            tax_show = 22,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_class = "Proteobacteriota",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

amp_heatmap(amp,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

Let’s divide the dataset into two groups.

1st - more than 10% of samples should contain at least 10 reads - this group will be analyzed further

ps.inall <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) > (0.1*length(x)), TRUE)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

The second part - the remaining, but more than 100 reeds in all samples(hereinafter - outliers majors(OM))
The remaining phylotypes are dropped from the analysis.

ps.exl  <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) < (0.1*length(x)), TRUE)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

ps.exl  <- prune_taxa(taxa_sums(ps.exl) > 100, ps.exl)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'

amp.exl <- phyloseq_to_ampvis2(ps.exl)
amp_heatmap(amp.exl,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

Same, but with relative abundance.

ps.per  <-  phyloseq::transform_sample_counts(ps.f, function(x) x / sum(x) * 100)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'

amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, 
      showRemainingTaxa = TRUE)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, )

OM aggregated by genus. Relative abundance.

amp_heatmap(amp.exl.r,
            tax_show = 30, 
            group_by = "Day", 
            tax_aggregate = "Genus",
            tax_add = "Phylum",
            tax_class = "Proteobacteria",
            round =  2,
            normalise=FALSE, 
            showRemainingTaxa = TRUE)

If we look at how OMs are distributed by day - it seems that the distribution of OMs is related not only to the specificity of individual bags, but also to purely technical features - the outlier values are mostly not with biological repeats, but with technical ones (red line - selected visually)

p_box <- phyloseq::sample_sums(ps.per.exl) %>% 
  as.data.frame(col.names = "values") %>% 
  setNames(., nm = "values") %>% 
  rownames_to_column("samples") %>% 
  mutate(Day = sapply(strsplit(samples, "\\."), `[`, 3)) %>% 
  ggplot(aes(x=Day, y=values, color=Day, fill = Day)) +
  geom_boxplot(aes(color=Day, fill = Day)) +
  geom_point(color = "black", position = position_dodge(width=0.2)) +
  geom_hline(yintercept = 10, colour = "red") +
  theme_bw() +
  theme(legend.position = "none")
     
p_box <- p_box + viridis::scale_color_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.5)

p_box + viridis::scale_fill_viridis(option = "H", discrete = TRUE, direction=1, begin=0.1, end = 0.9, alpha = 0.3)

0.7 WGCNA

0.7.1 clr

after clr normalization / looks disgusting - let’s try vst normalization from DESeq2

otu.inall <- as.data.frame(ps.inall@otu_table@.Data) 
ps.inall.clr <- ps.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")

## pickSoftThreshold: will use block size 321.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 321 of 321

## Warning: executing %dopar% sequentially: no parallel backend registered

##    Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1    0.133  19.60         0.8930 160.0000 160.00000 164.000
## 2      2    0.309 -18.70         0.8910  83.1000  82.90000  90.300
## 3      3    0.465 -11.70         0.8350  44.9000  44.50000  52.900
## 4      4    0.362  -5.99         0.7610  25.2000  24.60000  32.600
## 5      5    0.252  -3.34         0.7890  14.6000  14.00000  21.000
## 6      6    0.218  -2.19         0.7400   8.7500   8.31000  14.100
## 7      7    0.257  -1.82         0.7160   5.4300   5.09000   9.800
## 8      8    0.328  -1.71         0.6800   3.4900   3.21000   7.030
## 9      9    0.497  -1.92         0.7830   2.3100   2.09000   5.380
## 10    10    0.629  -2.03         0.7780   1.5800   1.39000   4.270
## 11    12    0.231  -4.27         0.0333   0.7990   0.66100   2.900
## 12    13    0.861  -2.22         0.9010   0.5900   0.47100   2.450
## 13    14    0.850  -2.16         0.8560   0.4450   0.34100   2.110
## 14    15    0.262  -3.70         0.0795   0.3430   0.24700   1.840
## 15    16    0.265  -3.55         0.0859   0.2690   0.18400   1.620
## 16    17    0.267  -3.42         0.0912   0.2140   0.13700   1.430
## 17    18    0.902  -1.97         0.9060   0.1730   0.10500   1.280
## 18    19    0.912  -1.92         0.9100   0.1420   0.08000   1.150
## 19    20    0.924  -1.87         0.9200   0.1170   0.06170   1.040
## 20    21    0.292  -4.09         0.1340   0.0981   0.04810   0.944
## 21    22    0.304  -3.06         0.1950   0.0829   0.03820   0.861
## 22    23    0.300  -2.94         0.1830   0.0706   0.03050   0.788
## 23    24    0.294  -2.79         0.1680   0.0607   0.02460   0.736
## 24    25    0.928  -1.62         0.9080   0.0525   0.01950   0.693
## 25    26    0.935  -1.60         0.9170   0.0458   0.01530   0.656
## 26    27    0.933  -1.57         0.9140   0.0402   0.01210   0.623
## 27    28    0.264  -2.43         0.0682   0.0354   0.00967   0.593
## 28    29    0.266  -2.40         0.0708   0.0314   0.00769   0.565
## 29    30    0.196  -2.67        -0.0255   0.0280   0.00608   0.541

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

0.7.2 vst

after vst normalisation

ps.varstab <- ps_vst(ps.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")

## pickSoftThreshold: will use block size 321.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 321 of 321
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1    1.0    0.339 -0.985         0.7120  46.800  43.10000  90.50
## 2    1.5    0.569 -1.150         0.8650  28.800  25.60000  65.50
## 3    2.0    0.711 -1.200         0.9340  18.900  16.90000  49.10
## 4    2.5    0.754 -1.210         0.9080  13.000  11.20000  38.10
## 5    3.0    0.807 -1.280         0.9260   9.260   7.36000  30.40
## 6    3.5    0.864 -1.330         0.9610   6.820   5.08000  24.80
## 7    4.0    0.863 -1.350         0.9310   5.160   3.59000  20.60
## 8    4.5    0.845 -1.380         0.8890   3.990   2.56000  17.30
## 9    5.0    0.780 -1.450         0.8240   3.140   1.84000  14.70
## 10   5.5    0.814 -1.440         0.8690   2.520   1.35000  12.70
## 11   6.0    0.819 -1.440         0.8580   2.050   1.03000  11.00
## 12   6.5    0.889 -1.350         0.9420   1.690   0.78200   9.66
## 13   7.0    0.868 -1.340         0.9070   1.410   0.60600   8.53
## 14   7.5    0.873 -1.330         0.9020   1.190   0.46700   7.68
## 15   8.0    0.818 -1.360         0.8180   1.020   0.38800   6.97
## 16   8.5    0.833 -1.360         0.8250   0.877   0.32000   6.35
## 17   9.0    0.854 -1.360         0.8550   0.762   0.25600   5.83
## 18   9.5    0.892 -1.370         0.9150   0.666   0.21400   5.37
## 19  10.0    0.886 -1.350         0.8940   0.587   0.17900   4.96
## 20  11.0    0.958 -1.290         0.9790   0.465   0.11800   4.29
## 21  12.0    0.902 -1.290         0.9060   0.376   0.08010   3.75
## 22  13.0    0.922 -1.270         0.9270   0.310   0.05370   3.31
## 23  14.0    0.913 -1.240         0.9080   0.260   0.03730   2.94
## 24  15.0    0.936 -1.190         0.9260   0.221   0.02670   2.63
## 25  16.0    0.924 -1.180         0.9110   0.190   0.02050   2.36
## 26  17.0    0.181 -1.840        -0.0518   0.165   0.01440   2.16
## 27  18.0    0.910 -1.210         0.8860   0.145   0.00980   2.05
## 28  19.0    0.901 -1.210         0.8770   0.129   0.00664   1.95
## 29  20.0    0.909 -1.240         0.8970   0.115   0.00468   1.87

plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)


net3 <- WGCNA::blockwiseModules(data2,
                          power=5.5,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)


mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

unique(ps.inall@sam_data$Bag)

##  [1] D01 D03 D05 D07 D08 D10 D12 D13 D14 D15
## Levels: D01 D03 D05 D07 D08 D10 D12 D13 D14 D15

0.7.2.1 import libraries 2

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)

modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.

p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "day",
       y = "normalized abundance")

p + scale_color_manual(values = levels(submod_df$module)) +
  scale_x_discrete(labels=(c("D01"="3",
                                "D03"="14",
                                "D05"="28",
                                "D07"="49" ,
                                "D08"="63",
                                "D10"="91",
                                "D12"="119",
                                "D13"="140",
                                "D14"="161",
                                "D15"="182")
         ))

#### ordination

UNIFRAC - the sausage narrows down

ps.inall.col <- ps.inall

df <- module_df %>% 
  rename("id" = "asv")
df <- df %>% 
  dplyr::select(-"id") %>% 
  mutate(colors = as.factor(colors))
taxa <- as.data.frame(ps.inall@tax_table@.Data)
tx <- cbind(taxa, df)
tx$colors <- factor(tx$colors, levels = c("cyan", "darkgreen", "red", "salmon", "blue", "plum"))
tax_table(ps.inall.col) <- tax_table(as.matrix(tx))
ord <- ordinate(ps.inall.col, "NMDS", "unifrac", "species")

## Run 0 stress 0.07519118 
## Run 1 stress 0.07358942 
## ... New best solution
## ... Procrustes: rmse 0.01813614  max resid 0.05794054 
## Run 2 stress 0.07083612 
## ... New best solution
## ... Procrustes: rmse 0.05788181  max resid 0.2184256 
## Run 3 stress 0.07358915 
## Run 4 stress 0.07444672 
## Run 5 stress 0.07926786 
## Run 6 stress 0.07632554 
## Run 7 stress 0.05777387 
## ... New best solution
## ... Procrustes: rmse 0.04546038  max resid 0.2215956 
## Run 8 stress 0.07083624 
## Run 9 stress 0.07444694 
## Run 10 stress 0.07049655 
## Run 11 stress 0.07553988 
## Run 12 stress 0.0704966 
## Run 13 stress 0.0704966 
## Run 14 stress 0.08071969 
## Run 15 stress 0.07557142 
## Run 16 stress 0.05788492 
## ... Procrustes: rmse 0.005914098  max resid 0.02748847 
## Run 17 stress 0.08001937 
## Run 18 stress 0.05777385 
## ... New best solution
## ... Procrustes: rmse 0.00001062268  max resid 0.00003468096 
## ... Similar to previous best
## Run 19 stress 0.07078899 
## Run 20 stress 0.07858074 
## *** Best solution repeated 1 times

plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("cyan", "darkgreen", "red", "salmon", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

Bray - the sausage even
Late stages on the left - with early clusters overlapping, late clusters seems separated What affects on the axis2? There is clearly some sort of pattern.

ord <- ordinate(ps.inall.col, "NMDS", "bray", "species")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09266802 
## Run 1 stress 0.09266803 
## ... Procrustes: rmse 0.00002041571  max resid 0.00007667111 
## ... Similar to previous best
## Run 2 stress 0.1309815 
## Run 3 stress 0.09266805 
## ... Procrustes: rmse 0.00005002405  max resid 0.000204567 
## ... Similar to previous best
## Run 4 stress 0.09266803 
## ... Procrustes: rmse 0.00002339873  max resid 0.00008107723 
## ... Similar to previous best
## Run 5 stress 0.1296072 
## Run 6 stress 0.1311172 
## Run 7 stress 0.1383667 
## Run 8 stress 0.09266802 
## ... Procrustes: rmse 0.000008511634  max resid 0.00004247589 
## ... Similar to previous best
## Run 9 stress 0.1456796 
## Run 10 stress 0.09266802 
## ... Procrustes: rmse 0.000003038434  max resid 0.00001420329 
## ... Similar to previous best
## Run 11 stress 0.09266802 
## ... Procrustes: rmse 0.00001295347  max resid 0.00005351635 
## ... Similar to previous best
## Run 12 stress 0.09266802 
## ... Procrustes: rmse 0.000005447383  max resid 0.0000249077 
## ... Similar to previous best
## Run 13 stress 0.09266803 
## ... Procrustes: rmse 0.00002505859  max resid 0.00008787721 
## ... Similar to previous best
## Run 14 stress 0.09266802 
## ... Procrustes: rmse 0.00000349461  max resid 0.00001394197 
## ... Similar to previous best
## Run 15 stress 0.1296263 
## Run 16 stress 0.1297207 
## Run 17 stress 0.09266804 
## ... Procrustes: rmse 0.00004076479  max resid 0.0001694088 
## ... Similar to previous best
## Run 18 stress 0.09266802 
## ... Procrustes: rmse 0.000006043647  max resid 0.00001496754 
## ... Similar to previous best
## Run 19 stress 0.09266802 
## ... Procrustes: rmse 0.000008608011  max resid 0.0000221727 
## ... Similar to previous best
## Run 20 stress 0.09266802 
## ... Procrustes: rmse 0.000004973947  max resid 0.00001244682 
## ... Similar to previous best
## *** Best solution repeated 13 times

plot_ordination(ps.inall.col, ord, type = "species", color = "colors") + 
  scale_color_manual(values =  c("cyan", "darkgreen", "red", "salmon", "blue", "plum")) +
  theme_bw() +
  theme(legend.position = "none")

0.7.2.2 visualisation

Next are the same pictures for all the groups.

l_vst <- color_filt(ps.inall, submod_df)
l_vst

$cyan $cyan$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 82 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 82 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 82 tips and 81 internal nodes ] refseq() DNAStringSet: [ 82 reference sequences ]

$cyan$amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 82 70590 183 5339 1720 2016.86

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 82(100%) 82(100%) 82(100%) 82(100%) 82(100%) 71(86.59%) 12(14.63%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

$cyan$heat

$cyan$heat_rel

$cyan$tree

$cyan$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq143	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Burkholderiaceae	Burkholderia	NA
Seq65	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Alcaligenaceae	NA	NA
Seq149	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Comamonadaceae	Variovorax	NA
Seq503	Bacteria	Cyanobacteriota	Vampirivibrionia	Obscuribacterales	Obscuribacteraceae	NA	NA
Seq622	Bacteria	Pseudomonadota	Alphaproteobacteria	Ferrovibrionales	Ferrovibrionaceae	Ferrovibrio	soli
Seq341	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	NA	NA
Seq239	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Beijerinckiaceae	Methylobacterium-Methylorubrum	NA
Seq232	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Beijerinckiaceae	Bosea	thiooxidans
Seq538	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Beijerinckiaceae	Bosea	NA
Seq105	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Tardiphaga	robiniae
Seq31	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Starkeya	NA
Seq212	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Hyphomicrobiaceae	Hyphomicrobium	NA
Seq605	Bacteria	Pseudomonadota	Alphaproteobacteria	Micropepsales	Micropepsaceae	NA	NA
Seq386	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingopyxis	NA
Seq610	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Altererythrobacter	NA
Seq595	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Altererythrobacter	NA
Seq132	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Mesorhizobium	NA
Seq462	Bacteria	Pseudomonadota	Alphaproteobacteria	Caulobacterales	Caulobacteraceae	NA	NA
Seq77	Bacteria	Actinobacteriota	Thermoleophilia	Solirubrobacterales	Solirubrobacteraceae	Conexibacter	NA
Seq549	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Pajaroellobacter	NA
Seq345	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Pajaroellobacter	NA
Seq103	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Sorangium	NA
Seq486	Bacteria	Actinobacteriota	Acidimicrobiia	Microtrichales	Ilumatobacteraceae	NA	NA
Seq113	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Promicromonosporaceae	Promicromonospora	NA
Seq598	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	Leifsonia	NA
Seq171	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	NA	aoyamense
Seq674	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	Agromyces	NA
Seq139	Bacteria	Actinobacteriota	Actinobacteria	Corynebacteriales	Mycobacteriaceae	Mycobacterium	NA
Seq159	Bacteria	Actinobacteriota	Actinobacteria	Streptosporangiales	Streptosporangiaceae	Herbidospora	mongoliensis
Seq654	Bacteria	Actinobacteriota	Actinobacteria	Streptosporangiales	Streptosporangiaceae	Nonomuraea	NA
Seq86	Bacteria	Actinobacteriota	Actinobacteria	Streptosporangiales	Thermomonosporaceae	Actinocorallia	NA
Seq267	Bacteria	Actinobacteriota	Actinobacteria	Streptomycetales	Streptomycetaceae	Streptomyces	NA
Seq745	Bacteria	Actinobacteriota	Actinobacteria	Propionibacteriales	Propionibacteriaceae	Jiangella	NA
Seq245	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	Sphingobacteriaceae	Pedobacter	panaciterrae
Seq189	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	Sphingobacteriaceae	Pedobacter	NA
Seq492	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	NS11-12 marine group	NA	NA
Seq716	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	NS11-12 marine group	NA	NA
Seq309	Bacteria	Bacillota	Bacilli	Bacillales	Bacillaceae	Bacillus	NA
Seq101	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq52	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq18	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq265	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq415	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq439	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Domibacillus	NA
Seq121	Bacteria	Bacillota	Bacilli	Bacillales	Bacillaceae	Bacillus	NA
Seq54	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Lysinibacillus	NA
Seq144	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Lysinibacillus	NA
Seq84	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Terrimicrobiaceae	Terrimicrobium	NA
Seq349	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Terrimicrobiaceae	Terrimicrobium	NA
Seq228	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Terrimicrobiaceae	Terrimicrobium	NA
Seq631	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Chthoniobacteraceae	Chthoniobacter	NA
Seq451	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Chthoniobacteraceae	Chthoniobacter	NA
Seq431	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Verrucomicrobiales	Verrucomicrobiaceae	Roseimicrobium	gellanilyticum
Seq315	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Verrucomicrobiales	Verrucomicrobiaceae	Verrucomicrobium	spinosum
Seq246	Bacteria	Planctomycetota	Planctomycetes	Gemmatales	Gemmataceae	Gemmata	NA
Seq375	Bacteria	Planctomycetota	Planctomycetes	Isosphaerales	Isosphaeraceae	Singulisphaera	NA
Seq395	Bacteria	Acidobacteriota	Acidobacteriae	Acidobacteriales	Acidobacteriaceae (Subgroup 1)	Edaphobacter	NA
Seq73	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq237	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq420	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Pseudoflavitalea	NA
Seq32	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Pseudoflavitalea	NA
Seq765	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	NA
Seq40	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	hibisci
Seq234	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	NA
Seq372	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	NA
Seq126	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Flavitalea	NA
Seq155	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	ginsengihumi
Seq125	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	arvensicola
Seq496	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	soli
Seq76	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq410	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq50	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Taibaiella	NA
Seq481	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Taibaiella	NA
Seq147	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Taibaiella	NA
Seq1006	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Moraxellaceae	NA	NA
Seq727	Bacteria	Pseudomonadota	Gammaproteobacteria	Steroidobacterales	Steroidobacteraceae	Steroidobacter	NA
Seq409	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq471	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq581	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq335	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Rhodanobacteraceae	Tahibacter	NA
Seq186	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Xanthomonas	NA
Seq606	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	NA	NA

$darkgreen $darkgreen$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 29 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 29 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 29 tips and 28 internal nodes ] refseq() DNAStringSet: [ 29 reference sequences ]

$darkgreen$amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 29 173281 336 15108 4808 4950.89

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 29(100%) 29(100%) 29(100%) 29(100%) 29(100%) 28(96.55%) 2(6.9%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

$darkgreen$heat

$darkgreen$heat_rel

$darkgreen$tree

$darkgreen$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq120	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Burkholderiaceae	Burkholderia	telluris
Seq69	Bacteria	Pseudomonadota	Alphaproteobacteria	Azospirillales	Inquilinaceae	Inquilinus	ginsengisoli
Seq4	Bacteria	Pseudomonadota	Alphaproteobacteria	Azospirillales	Inquilinaceae	Inquilinus	NA
Seq370	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Pseudorhodoplanes	NA
Seq9	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Bradyrhizobium	NA
Seq82	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Bradyrhizobium	NA
Seq22	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Starkeya	NA
Seq44	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Allorhizobium	NA
Seq10	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Allorhizobium	NA
Seq29	Bacteria	Myxococcota	Polyangia	Nannocystales	Nannocystaceae	Nannocystis	NA
Seq364	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Labilithrix	NA
Seq51	Bacteria	Actinobacteriota	Actinobacteria	Corynebacteriales	Mycobacteriaceae	Mycobacterium	NA
Seq34	Bacteria	Actinobacteriota	Actinobacteria	Streptosporangiales	Streptosporangiaceae	Herbidospora	NA
Seq57	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq25	Bacteria	Bacillota	Bacilli	Bacillales	Bacillaceae	Terribacillus	NA
Seq3	Bacteria	Bacillota	Bacilli	Bacillales	Bacillaceae	Bacillus	NA
Seq5	Bacteria	Bacillota	Bacilli	Bacillales	Bacillaceae	Bacillus	NA
Seq13	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	NA	NA
Seq6	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Solibacillus	NA
Seq19	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Solibacillus	NA
Seq17	Bacteria	Planctomycetota	Planctomycetes	Isosphaerales	Isosphaeraceae	Singulisphaera	NA
Seq527	Bacteria	Acidobacteriota	Acidobacteriae	Acidobacteriales	Acidobacteriaceae (Subgroup 1)	Edaphobacter	NA
Seq16	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq7	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq1	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq134	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq81	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Spirosomaceae	Dyadobacter	NA
Seq15	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Rhodanobacteraceae	Luteibacter	NA
Seq26	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Luteimonas	NA

$red $red$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 139 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 139 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 139 tips and 138 internal nodes ] refseq() DNAStringSet: [ 139 reference sequences ]

$red$amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 139 109849 0 14357 1966 3138.54

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 139(100%) 139(100%) 139(100%) 139(100%) 139(100%) 86(61.87%) 11(7.91%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

$red$heat

$red$heat_rel

$red$tree

$red$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq117	Archaea	Thermoproteota	Nitrososphaeria	Nitrososphaerales	Nitrososphaeraceae	NA	NA
Seq271	Archaea	Thermoproteota	Nitrososphaeria	Nitrososphaerales	Nitrososphaeraceae	Candidatus Nitrocosmicus	NA
Seq61	Archaea	Thermoproteota	Nitrososphaeria	Nitrososphaerales	Nitrososphaeraceae	NA	NA
Seq62	Archaea	Thermoproteota	Nitrososphaeria	Nitrososphaerales	Nitrososphaeraceae	NA	NA
Seq214	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	Aquicella	NA
Seq758	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	Aquicella	NA
Seq289	Bacteria	Cyanobacteriota	Vampirivibrionia	Obscuribacterales	Obscuribacteraceae	NA	NA
Seq23	Bacteria	Cyanobacteriota	Vampirivibrionia	Obscuribacterales	Obscuribacteraceae	NA	NA
Seq318	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingomonas	naasensis
Seq71	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingomonas	NA
Seq480	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingomonas	jaspsi
Seq310	Bacteria	Pseudomonadota	Alphaproteobacteria	Dongiales	Dongiaceae	Dongia	NA
Seq262	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	soli
Seq221	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	NA
Seq83	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	NA
Seq321	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	NA
Seq226	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	massiliensis
Seq537	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhodospirillales	Magnetospiraceae	NA	NA
Seq712	Bacteria	Pseudomonadota	Alphaproteobacteria	Acetobacterales	Acetobacteraceae	NA	NA
Seq433	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Kaistiaceae	Kaistia	NA
Seq270	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Pseudorhodoplanes	NA
Seq107	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Pseudolabrys	NA
Seq166	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Pseudolabrys	NA
Seq183	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	Pseudolabrys	NA
Seq426	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Xanthobacteraceae	NA	NA
Seq175	Bacteria	Pseudomonadota	Alphaproteobacteria	Micropepsales	Micropepsaceae	NA	NA
Seq129	Bacteria	Pseudomonadota	Alphaproteobacteria	Micropepsales	Micropepsaceae	NA	NA
Seq157	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Altererythrobacter	NA
Seq137	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingobium	NA
Seq28	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Devosiaceae	Devosia	NA
Seq592	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Mesorhizobium	NA
Seq711	Bacteria	Pseudomonadota	Alphaproteobacteria	Caulobacterales	Caulobacteraceae	Phenylobacterium	NA
Seq33	Bacteria	Pseudomonadota	Alphaproteobacteria	Caulobacterales	Caulobacteraceae	Caulobacter	NA
Seq225	Bacteria	Pseudomonadota	Alphaproteobacteria	Caulobacterales	Hyphomonadaceae	Hirschia	NA
Seq39	Bacteria	Gemmatimonadota	Gemmatimonadetes	Gemmatimonadales	Gemmatimonadaceae	NA	NA
Seq179	Bacteria	Nitrospirota	Nitrospiria	Nitrospirales	Nitrospiraceae	Nitrospira	japonica
Seq30	Bacteria	Actinobacteriota	Thermoleophilia	Solirubrobacterales	Solirubrobacteraceae	Conexibacter	NA
Seq156	Bacteria	Actinobacteriota	Thermoleophilia	Solirubrobacterales	Solirubrobacteraceae	Solirubrobacter	NA
Seq192	Bacteria	Actinobacteriota	Thermoleophilia	Solirubrobacterales	67-14	NA	NA
Seq383	Bacteria	Actinobacteriota	Thermoleophilia	Solirubrobacterales	67-14	NA	NA
Seq532	Bacteria	Myxococcota	Polyangia	Haliangiales	Haliangiaceae	Haliangium	NA
Seq504	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq172	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq46	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq493	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq100	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq35	Bacteria	Myxococcota	Polyangia	Polyangiales	BIrii41	NA	NA
Seq399	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Minicystis	NA
Seq128	Bacteria	Myxococcota	Polyangia	Polyangiales	Sandaracinaceae	NA	NA
Seq274	Bacteria	Bdellovibrionota	Bdellovibrionia	Bdellovibrionales	Bdellovibrionaceae	Bdellovibrio	NA
Seq119	Bacteria	Bdellovibrionota	Bdellovibrionia	Bdellovibrionales	Bdellovibrionaceae	Bdellovibrio	NA
Seq253	Bacteria	Dependentiae	Babeliae	Babeliales	UBA12409	NA	NA
Seq79	Bacteria	Actinobacteriota	Actinobacteria	Propionibacteriales	Propionibacteriaceae	Microlunatus	NA
Seq416	Bacteria	Actinobacteriota	Actinobacteria	Propionibacteriales	Nocardioidaceae	Kribbella	NA
Seq381	Bacteria	Actinobacteriota	Acidimicrobiia	Microtrichales	Ilumatobacteraceae	NA	NA
Seq173	Bacteria	Actinobacteriota	Acidimicrobiia	Microtrichales	Iamiaceae	Iamia	NA
Seq235	Bacteria	Actinobacteriota	Acidimicrobiia	Microtrichales	Iamiaceae	Iamia	NA
Seq43	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	Galbitalea	NA
Seq290	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	Galbitalea	NA
Seq693	Bacteria	Actinobacteriota	Actinobacteria	Micromonosporales	Micromonosporaceae	Luedemannella	NA
Seq148	Bacteria	Actinobacteriota	Actinobacteria	Micromonosporales	Micromonosporaceae	Dactylosporangium	NA
Seq357	Bacteria	Actinobacteriota	Actinobacteria	Streptomycetales	Streptomycetaceae	NA	NA
Seq317	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	Sphingobacteriaceae	Mucilaginibacter	NA
Seq230	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	Sphingobacteriaceae	Mucilaginibacter	calamicampi
Seq90	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	NS11-12 marine group	NA	NA
Seq520	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	NS11-12 marine group	NA	NA
Seq145	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	env.OPS 17	NA	NA
Seq284	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	lautus
Seq258	Bacteria	Bacillota	Bacilli	Paenibacillales	Paenibacillaceae	Paenibacillus	NA
Seq244	Bacteria	Bacillota	Bacilli	Bacillales	Planococcaceae	Paenisporosarcina	NA
Seq85	Bacteria	Spirochaetota	Spirochaetia	Spirochaetales	Spirochaetaceae	Salinispira	NA
Seq185	Bacteria	Spirochaetota	Spirochaetia	Spirochaetales	Spirochaetaceae	Spirochaeta 2	NA
Seq236	Bacteria	Spirochaetota	Spirochaetia	Spirochaetales	Spirochaetaceae	Spirochaeta 2	NA
Seq590	Bacteria	Patescibacteria	Saccharimonadia	Saccharimonadales	LWQ8	NA	NA
Seq238	Bacteria	Fibrobacterota	Fibrobacteria	Fibrobacterales	Fibrobacteraceae	NA	NA
Seq490	Bacteria	Chloroflexota	Anaerolineae	Anaerolineales	Anaerolineaceae	NA	NA
Seq612	Bacteria	Chloroflexota	Chloroflexia	Chloroflexales	Roseiflexaceae	NA	NA
Seq222	Bacteria	Chloroflexota	Chloroflexia	Chloroflexales	Roseiflexaceae	NA	NA
Seq421	Bacteria	Chloroflexota	Chloroflexia	Chloroflexales	Roseiflexaceae	NA	NA
Seq613	Bacteria	Chloroflexota	Chloroflexia	Thermomicrobiales	JG30-KF-CM45	NA	NA
Seq530	Bacteria	Chloroflexota	Chloroflexia	Thermomicrobiales	JG30-KF-CM45	NA	NA
Seq388	Bacteria	Armatimonadota	Fimbriimonadia	Fimbriimonadales	Fimbriimonadaceae	NA	NA
Seq576	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Pedosphaerales	Pedosphaeraceae	NA	NA
Seq417	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Opitutales	Opitutaceae	Lacunisphaera	NA
Seq152	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Opitutales	Opitutaceae	Lacunisphaera	limnophila
Seq517	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Opitutales	Opitutaceae	Opitutus	NA
Seq153	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Terrimicrobiaceae	Terrimicrobium	NA
Seq281	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Terrimicrobiaceae	Terrimicrobium	NA
Seq528	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Chthoniobacterales	Chthoniobacteraceae	LD29	NA
Seq659	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Verrucomicrobiales	Rubritaleaceae	Luteolibacter	NA
Seq713	Bacteria	Planctomycetota	Phycisphaerae	Phycisphaerales	Phycisphaeraceae	NA	NA
Seq601	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	NA	NA
Seq839	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	NA	NA
Seq521	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	NA	NA
Seq585	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	NA	NA
Seq195	Bacteria	Planctomycetota	Planctomycetes	Gemmatales	Gemmataceae	Gemmata	NA
Seq404	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	Pir4 lineage	NA
Seq440	Bacteria	Planctomycetota	Planctomycetes	Pirellulales	Pirellulaceae	Pir4 lineage	NA
Seq300	Bacteria	Planctomycetota	Planctomycetes	Planctomycetales	Rubinisphaeraceae	SH-PL14	NA
Seq292	Bacteria	Planctomycetota	Planctomycetes	Planctomycetales	Schlesneriaceae	Schlesneria	NA
Seq200	Bacteria	Acidobacteriota	Blastocatellia	Blastocatellales	Blastocatellaceae	NA	NA
Seq342	Bacteria	Acidobacteriota	Vicinamibacteria	Vicinamibacterales	Vicinamibacteraceae	NA	NA
Seq434	Bacteria	Acidobacteriota	Vicinamibacteria	Vicinamibacterales	Vicinamibacteraceae	NA	NA
Seq607	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq12	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq196	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq529	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq11	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	NA	NA
Seq570	Bacteria	Bacteroidota	Bacteroidia	Sphingobacteriales	env.OPS 17	NA	NA
Seq178	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Terrimonas	NA
Seq92	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Terrimonas	NA
Seq326	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Terrimonas	NA
Seq768	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Pseudoflavitalea	NA
Seq191	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Flavitalea	NA
Seq557	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	NA	NA
Seq671	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Edaphobaculum	NA
Seq459	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Edaphobaculum	NA
Seq550	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Edaphobaculum	NA
Seq675	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Edaphobaculum	NA
Seq547	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Taibaiella	NA
Seq834	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Amoebophilaceae	Candidatus Amoebophilus	NA
Seq760	Bacteria	Pseudomonadota	Gammaproteobacteria	Coxiellales	Coxiellaceae	Coxiella	NA
Seq353	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	211ds20	NA	NA
Seq169	Bacteria	Pseudomonadota	Gammaproteobacteria	Steroidobacterales	Steroidobacteraceae	Steroidobacter	flavus
Seq352	Bacteria	Pseudomonadota	Gammaproteobacteria	Steroidobacterales	Steroidobacteraceae	Steroidobacter	NA
Seq506	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq58	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq264	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq231	Bacteria	Pseudomonadota	Gammaproteobacteria	Gammaproteobacteria Incertae Sedis	Unknown Family	Acidibacter	NA
Seq544	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Luteimonas	vadosa
Seq114	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Rhodanobacteraceae	Dokdonella	ginsengisoli
Seq207	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Rhodanobacteraceae	Dokdonella	NA
Seq627	Bacteria	Pseudomonadota	Gammaproteobacteria	Salinisphaerales	Solimonadaceae	NA	NA
Seq562	Bacteria	Pseudomonadota	Gammaproteobacteria	Salinisphaerales	Solimonadaceae	Alkanibacter	NA
Seq786	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	NA	NA
Seq782	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	NA	NA
Seq339	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	NA	NA
Seq138	Bacteria	Pseudomonadota	Gammaproteobacteria	Diplorickettsiales	Diplorickettsiaceae	NA	NA
Seq358	Bacteria	Pseudomonadota	Gammaproteobacteria	Legionellales	Legionellaceae	Legionella	NA

$salmon $salmon$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 71 taxa and 35 samples ] sample_data() Sample Data: [ 35 samples by 4 sample variables ] tax_table() Taxonomy Table: [ 71 taxa by 7 taxonomic ranks ] phy_tree() Phylogenetic Tree: [ 71 tips and 70 internal nodes ] refseq() DNAStringSet: [ 71 reference sequences ]

$salmon$amp ampvis2 object with 5 elements. Summary of OTU table: [1] 35 71 115593 29 20719 779 3302.66

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 71(100%) 71(100%) 71(100%) 71(100%) 71(100%) 67(94.37%) 11(15.49%)

Metadata variables: 5 SampleID, Bag, Description, Group, Day

$salmon$heat

$salmon$heat_rel

$salmon$tree

$salmon$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq87	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	Massilia	armeniaca
Seq151	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	NA	NA
Seq241	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	Pseudoduganella	NA
Seq170	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	Pseudoduganella	eburnea
Seq141	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	Massilia	NA
Seq324	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Oxalobacteraceae	Massilia	NA
Seq208	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Burkholderiaceae	Cupriavidus	NA
Seq8	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Burkholderiaceae	Cupriavidus	NA
Seq14	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Alcaligenaceae	Achromobacter	NA
Seq55	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Comamonadaceae	Xylophilus	paradoxus
Seq115	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Comamonadaceae	NA	NA
Seq552	Bacteria	Pseudomonadota	Gammaproteobacteria	Burkholderiales	Comamonadaceae	Rhizobacter	NA
Seq412	Bacteria	Pseudomonadota	Alphaproteobacteria	Sphingomonadales	Sphingomonadaceae	Sphingomonas	mucosissima
Seq540	Bacteria	Pseudomonadota	Alphaproteobacteria	Reyranellales	Reyranellaceae	Reyranella	NA
Seq123	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Beijerinckiaceae	Microvirga	NA
Seq359	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Allorhizobium	NA
Seq371	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Devosiaceae	Devosia	neptuniae
Seq257	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Shinella	NA
Seq202	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	NA	NA
Seq42	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Mycoplana	NA
Seq422	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Ensifer	NA
Seq21	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Allorhizobium	NA
Seq133	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Neorhizobium	NA
Seq348	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	Allorhizobium	azooxidifex
Seq177	Bacteria	Pseudomonadota	Alphaproteobacteria	Rhizobiales	Rhizobiaceae	NA	NA
Seq423	Bacteria	Pseudomonadota	Alphaproteobacteria	Caulobacterales	Caulobacteraceae	Phenylobacterium	mobile
Seq611	Bacteria	Myxococcota	Polyangia	Haliangiales	Haliangiaceae	Haliangium	NA
Seq401	Bacteria	Myxococcota	Polyangia	Polyangiales	Polyangiaceae	Pajaroellobacter	NA
Seq325	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Promicromonosporaceae	Cellulosimicrobium	NA
Seq95	Bacteria	Actinobacteriota	Actinobacteria	Micrococcales	Microbacteriaceae	Microbacterium	NA
Seq74	Bacteria	Actinobacteriota	Actinobacteria	Corynebacteriales	Mycobacteriaceae	Mycobacterium	NA
Seq443	Bacteria	Actinobacteriota	Actinobacteria	Glycomycetales	Glycomycetaceae	Glycomyces	NA
Seq78	Bacteria	Actinobacteriota	Actinobacteria	Streptomycetales	Streptomycetaceae	Streptomyces	NA
Seq94	Bacteria	Actinobacteriota	Actinobacteria	Streptomycetales	Streptomycetaceae	Streptomyces	NA
Seq414	Bacteria	Actinobacteriota	Actinobacteria	Streptomycetales	Streptomycetaceae	Streptomyces	NA
Seq181	Bacteria	Verrucomicrobiota	Verrucomicrobiae	Verrucomicrobiales	Verrucomicrobiaceae	Roseimicrobium	NA
Seq218	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq390	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq142	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq255	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Microscillaceae	Ohtaekwangia	NA
Seq53	Bacteria	Bacteroidota	Bacteroidia	Flavobacteriales	Flavobacteriaceae	Flavobacterium	NA
Seq261	Bacteria	Bacteroidota	Bacteroidia	Flavobacteriales	Weeksellaceae	Chryseobacterium	ginsenosidimutans
Seq233	Bacteria	Bacteroidota	Bacteroidia	Flavobacteriales	Weeksellaceae	Chryseobacterium	NA
Seq446	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Flavitalea	NA
Seq97	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Pseudoflavitalea	NA
Seq131	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	NA
Seq136	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Niastella	NA
Seq167	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Pseudoflavitalea	NA
Seq37	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq215	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq248	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	humicola
Seq112	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq89	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	pinensis
Seq2	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq323	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq20	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq445	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	Chitinophaga	NA
Seq38	Bacteria	Bacteroidota	Bacteroidia	Cytophagales	Spirosomaceae	Dyadobacter	NA
Seq362	Bacteria	Pseudomonadota	Gammaproteobacteria	Steroidobacterales	Steroidobacteraceae	Steroidobacter	agariperforans
Seq355	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Pseudoxanthomonas	NA
Seq60	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Stenotrophomonas	NA
Seq130	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Stenotrophomonas	NA
Seq305	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Lysobacter	NA
Seq41	Bacteria	Pseudomonadota	Gammaproteobacteria	Xanthomonadales	Xanthomonadaceae	Lysobacter	NA
Seq667	Bacteria	Pseudomonadota	Gammaproteobacteria	Legionellales	Legionellaceae	Legionella	NA
Seq176	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	Pseudomonas	NA
Seq24	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	Pseudomonas	NA
Seq27	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	Pseudomonas	NA
Seq99	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	Pseudomonas	NA
Seq56	Bacteria	Pseudomonadota	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	Pseudomonas	NA
Seq279	Bacteria	Pseudomonadota	Gammaproteobacteria	Enterobacterales	Enterobacteriaceae	Klebsiella	NA

0.7.3 mdp and other indices for clusters

I can’t do stat mdp support for clusters, but the early clusters(salmon, cyan) have lower mdp than the later ones.

external_empty_dataframe <- data.frame(cluster=factor(),
                                       mdp=numeric(),
                                       richness=numeric(),
                                       stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  ps.in <- l_vst[[i]][["ps"]]
  m <- ps.in@otu_table@.Data %>% 
    colSums() %>% 
    as.data.frame() %>% 
    as.matrix() %>% 
    t()
  
  mdp_index <- picante::mpd(m, cophenetic(ps.in@phy_tree)) 
  ricness_index <- ps.in@tax_table %>% 
    rownames() %>% 
    length()
  d <- data.frame(cluster = i,
                  mdp = mdp_index,
                  richness = ricness_index)
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

external_empty_dataframe %>% 
  dplyr::arrange(mdp)

##     cluster      mdp richness
## 1 darkgreen 1.541596       29
## 2    salmon 1.629239       71
## 3      cyan 1.754819       82
## 4       red 1.928198      139

colSums(l_vst$salmon$ps@otu_table@.Data)

##  Seq87 Seq151 Seq241 Seq170 Seq141 Seq324 Seq208   Seq8  Seq14  Seq55 Seq115 
##   1570    975    549    765    703    390    669   9841   6482   2589   1262 
## Seq552 Seq412 Seq540 Seq123 Seq359 Seq371 Seq257 Seq202  Seq42 Seq422  Seq21 
##    195    304    200    669    337    341    516    688   3429    261   5170 
## Seq133 Seq348 Seq177 Seq423 Seq611 Seq401 Seq325  Seq95  Seq74 Seq443  Seq78 
##   1064    363    798    158    160    310    390   1469   1941    249   1822 
##  Seq94 Seq414 Seq181 Seq218 Seq390 Seq142 Seq255  Seq53 Seq261 Seq233 Seq446 
##   1179    301    791    638    317   1038    523   2663    512    572    266 
##  Seq97 Seq131 Seq136 Seq167  Seq37 Seq215 Seq248 Seq112  Seq89   Seq2 Seq323 
##   1457   1047   1054    875   3566    653    532   1311   1524  16855    392 
##  Seq20 Seq445  Seq38 Seq362 Seq355  Seq60 Seq130 Seq305  Seq41 Seq667 Seq176 
##   5184    266   3539    350    357   2372   1105    189   3465    137    799 
##  Seq24  Seq27  Seq99  Seq56 Seq279 
##   4452   4271   1373   2582    457

0.8 Add a metadata

list.files("meta/")

## [1] "cell_realtime_stat.xlsx" "cell_resp_ch_stat.xlsx" 
## [3] "period_legend.xlsx"

realtime.data <- readxl::read_excel("meta/cell_realtime_stat.xlsx")
period.data <- readxl::read_excel("meta/period_legend.xlsx")
resp.data <- readxl::read_excel("meta/cell_resp_ch_stat.xlsx")
realtime.data

## # A tibble: 36 × 29
##    id        day contr…¹ CELL_…² CELL_…³ CELL_…⁴ CELL_…⁵ CELL_…⁶ CELL_…⁷ CELL_…⁸
##    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 G01-1-1     3    15.6    33.2    31.1    33.6    36.6    33.8    38.0    33.0
##  2 G01-1-2     3    15.5    33.3    31.3    32.9    36.2    33.8    41      32.7
##  3 G01-1-3     3    15.5    33.6    31.2    32.9    35.8    33.3    38.2    32.9
##  4 G01-2-1     3    17.1    36.0    31.1    33.1    36.9    36.4    38.8    33.8
##  5 G01-2-2     3    17.3    35.6    31.0    32.8    37.8    35.7    40.1    33.6
##  6 G01-2-3     3    17.1    36      31.1    32.8    38.1    35.2    37.6    33.6
##  7 G01-3-1     3    16.4    35.4    31.6    34.9    39.1    35.3    40.9    33.2
##  8 G01-3-2     3    16.5    35.6    31.3    34.4    37.9    34.6    38.9    33.1
##  9 G01-3-3     3    16.4    35.8    31.5    34.4    41.6    35.0    36.5    33.4
## 10 G05-1-1    28    18.2    26.3    29.1    29.4    28.2    28.4    37.4    34.5
## # … with 26 more rows, 19 more variables: CELL_193122 <dbl>, CELL_73229 <dbl>,
## #   CELL_47814 <dbl>, CELL_163125 <dbl>, CELL_73266 <dbl>, CELL_88582 <dbl>,
## #   CELL_63583 <dbl>, CELL_14199 <dbl>, CELL_95616 <dbl>, CELL_63504 <dbl>,
## #   CELL_08643 <chr>, CELL_199599 <dbl>, CELL_01426 <dbl>, CELL_71601 <dbl>,
## #   CELL_45099 <dbl>, CELL_191900 <dbl>, CELL_99463 <dbl>, CELL_74579 <dbl>,
## #   CELL_183403 <dbl>, and abbreviated variable names ¹contr_16S, ²CELL_172283,
## #   ³CELL_203163, ⁴CELL_83325, ⁵CELL_188413, ⁶CELL_109631, ⁷CELL_188119, …

0.8.1 Realtime data

impute.mean <- function(x) replace(x, is.na(x), mean(x, na.rm = TRUE))

realtime_data <- realtime.data %>% 
  mutate(CELL_08643 = as.numeric(CELL_08643)) %>% 
  group_by(day) %>%
  mutate(nice_cell = impute.mean(CELL_08643)) %>% 
  mutate(day = as.factor(day))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `CELL_08643 = as.numeric(CELL_08643)`.
## Caused by warning:
## ! NAs introduced by coercion

realtime_zjena <- readxl::read_excel("cellulases_gene_expression(1).xlsx")
geom_mean <-function(x){exp(mean(log(x)))}

realtime_zjena_geom <- realtime_zjena %>% 
  mutate(repeats = paste0(realtime_zjena$week, "-", rep(1:3, 3, each=3))) %>% 
  relocate(repeats, 1) %>% 
  group_by(repeats) %>% 
  summarise_if(is.numeric, geom_mean) %>% 
  mutate(week = as.factor(week)) %>% 
  arrange(week)

UPGMA cellulase(distance - bray).
I don’t really know what that means. Depending on the distance, you get very different clusters.

realtime_matrix <- realtime_zjena_geom %>% 
  column_to_rownames("repeats") %>% 
  select_if(is.numeric) %>% 
  as.matrix()

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="bray"), "average")
plot(hcl)

And here is the Euclidean distance.
Nothing to compare with the previous picture

hcl <- hclust(vegan::vegdist(t(realtime_matrix), method="euclidian"), "average")
plot(hcl)

Cellulases corplot.
Correlations only

m = cor(realtime_matrix)
corrplot::corrplot(m)

Only significant correlations . In general, there are clusters, but the correlations are not significant (with some exceptions)

cor_test_mat <- psych::corr.test(realtime_matrix)$p
corrplot::corrplot(m, p.mat = cor_test_mat, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

The same, but the matrix is already logarithmased

cor_test_mat <- psych::corr.test(log(realtime_matrix))
corrplot::corrplot(cor_test_mat$r, p.mat = cor_test_mat$p, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1

# text(p1$x, p1$y, round(p1$corr, 2))

add phylogenic tree (mafft - iqtree(ML))

realtime_tree <- ape::read.tree("al_chz.fasta.contree")
plot(realtime_tree)

library(tidyverse)

realtime_data %>% 
  select(c("day", "id", "contr_16S", "CELL_172283")) %>% 
  mutate(bio_repl = gsub("-[1-3]$", "", id)) %>% 
  group_by(bio_repl, day) %>% 
  summarise(contr_tmean = mean(contr_16S),
            data_tmean = mean(CELL_172283)) %>% 
  mutate(dCt = data_tmean - contr_tmean)

## `summarise()` has grouped output by 'bio_repl'. You can override using the
## `.groups` argument.

## # A tibble: 12 × 5
## # Groups:   bio_repl [12]
##    bio_repl day   contr_tmean data_tmean   dCt
##    <chr>    <fct>       <dbl>      <dbl> <dbl>
##  1 G01-1    3            15.5       33.4 17.8 
##  2 G01-2    3            17.2       35.9 18.7 
##  3 G01-3    3            16.4       35.6 19.2 
##  4 G05-1    28           18.4       26.2  7.81
##  5 G05-2    28           16.5       26.2  9.73
##  6 G05-3    28           18.5       32.0 13.6 
##  7 G10-1    91           17.4       26.2  8.72
##  8 G10-2    91           16.3       24.1  7.79
##  9 G10-3    91           17.5       26.3  8.81
## 10 G14-1    161          18.0       28.1 10.1 
## 11 G14-2    161          16.5       27.0 10.6 
## 12 G14-3    161          16.9       25.4  8.41

resp_data <- resp.data %>% 
  group_by(day) %>%
  mutate(control = impute.mean(control)) %>% 
  mutate(straw = impute.mean(straw)) 

resp_data

## # A tibble: 164 × 3
## # Groups:   day [27]
##      day control straw
##    <dbl>   <dbl> <dbl>
##  1     0    250   250 
##  2     3    285.  723.
##  3     3    263.  832.
##  4     3    131.  657.
##  5     3    307.  525.
##  6     3    241.  744.
##  7     3    245.  701.
##  8     3    245.  657.
##  9     7    274.  690.
## 10     7    296.  690.
## # … with 154 more rows

0.8.2 Knee plot

Soil resperation - median(straw)/median(control)

You can use a third-party package (KneeArrower) to understand where the knee_plot breaks. For the elimination of repetitions let’s take the median. I don’t know how this package works (looks for a derivative, but I don’t know how it smoothes it out, it’s math), but it says that the breaking is more likely at 60-80 days.

(I corrected the errors - got closer to your data)

https://github.com/agentlans/KneeArrower - here’s where you can read about math

# resp_data %>% 
#   filter(!day == 0) %>% 
#   group_by(day) %>% 
#   summarise(median_control = median(control),
#             median_straw = median(straw)) %>% 
#   mutate(rel = median_straw/median_control) %>% 
#   ggplot() +
#   geom_point(aes(x = day, y = rel))

resp_median <- resp_data %>% 
  filter(!day == 0) %>% 
  group_by(day) %>% 
  summarise(median_control = median(control),
            median_straw = median(straw)) %>% 
  mutate(rel = median_straw/median_control)


thresholds <- c(0.25, 0.5, 0.75, 1)

# Find cutoff points at each threshold
cutoff.points <- lapply(thresholds, function(i) {
  KneeArrower::findCutoff(resp_median$day, resp_median$rel, method="first", i)
})

x.coord <- sapply(cutoff.points, function(p) p$x)
y.coord <- sapply(cutoff.points, function(p) p$y)

# Plot the cutoff points on the scatterplot
plot(resp_median$day, resp_median$rel, pch=20, col="gray")
points(x.coord, y.coord, col="red", pch=20)
text(x.coord, y.coord, labels=thresholds, pos=4, col="red")

period_data <- period.data %>%
  mutate(bag_id = as.factor(bag_id))

period_data

## # A tibble: 15 × 2
##    bag_id   day
##    <fct>  <dbl>
##  1 1          3
##  2 2          7
##  3 3         14
##  4 4         21
##  5 5         28
##  6 6         35
##  7 7         49
##  8 8         63
##  9 9         77
## 10 10        91
## 11 11       105
## 12 12       119
## 13 13       140
## 14 14       161
## 15 15       182

0.8.3 alpha/respiration.

Well, I don’t really understand what to do next - attach the clusters to this picture?

resp_median_bags <- resp_median %>% 
  left_join(period.data, by="day") %>% 
  mutate(bag_id = as.factor(bag_id))

ps.f.r <- rarefy_even_depth(ps.f)

estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed = scale(Observed),
    Shannon = scale(Shannon),
    InvSimpson = scale(InvSimpson),
    Respiration = scale(rel)
  ) %>% 
  select(c(bag_id, Observed, Shannon, InvSimpson, Respiration)) %>% 
  pivot_longer(c("Observed", "Shannon", "InvSimpson", "Respiration")) %>% 
  mutate(metric = factor(name, levels = c("Observed", "Shannon", "InvSimpson", "Respiration"))) %>% 
  ggplot(aes(y = value, x = bag_id, group = name)) +
  geom_line(aes(color = name),
            alpha = 0.8) +
  theme_bw() +
    scale_x_discrete(labels=(c("1"="3",
                                "3"="14",
                                "5"="28",
                                "7"="49" ,
                                "8"="63",
                                "10"="91",
                                "12"="119",
                                "13"="140",
                                "14"="161",
                                "15"="182")
         )) +
  labs(
    x = "Days",
    y = "z-scaled mean values",
    colour = ""
   )

Correlation is negative, weak, not particularly reliable, and only Pearson (which works for a normal distribution (we have a time series, we need a spearman for good measure))

alpha_resp <- phyloseq::estimate_richness(ps.f.r) %>% 
  rownames_to_column("ID") %>% 
  mutate(bag_id = as.factor(
    as.numeric(
      gsub("\\..+","",
           gsub("straw\\.16s\\.D","", ID)
           )
      )
    )
    ) %>% 
  group_by(bag_id) %>% 
  summarise(Observed = mean(Observed),
            Shannon = mean(Shannon),
            InvSimpson = mean(InvSimpson)) %>% 
  left_join(period_data, by="bag_id") %>% 
  left_join(resp_median_bags, by="bag_id") %>% 
  mutate(
    Observed_scaled = scale(Observed),
    Shannon_scaled = scale(Shannon),
    InvSimpson_scaled = scale(InvSimpson),
    Respiration_scaled = scale(rel)
  ) %>% 
  select(c(bag_id, Observed_scaled, Shannon_scaled, InvSimpson_scaled, Respiration_scaled))

cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## S = 240, p-value = 0.1909
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4545455

cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## S = 242, p-value = 0.1782
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4666667

cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## S = 244, p-value = 0.1661
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4787879

cor.test(alpha_resp$Observed_scaled, alpha_resp$Respiration_scaled, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Observed_scaled and alpha_resp$Respiration_scaled
## t = -2.6675, df = 8, p-value = 0.02847
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.91881253 -0.09942732
## sample estimates:
##        cor 
## -0.6861022

cor.test(alpha_resp$Shannon_scaled, alpha_resp$Respiration_scaled, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$Shannon_scaled and alpha_resp$Respiration_scaled
## t = -2.8858, df = 8, p-value = 0.02033
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9269747 -0.1536306
## sample estimates:
##        cor 
## -0.7141749

cor.test(alpha_resp$InvSimpson_scaled, alpha_resp$Respiration_scaled, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  alpha_resp$InvSimpson_scaled and alpha_resp$Respiration_scaled
## t = -2.3261, df = 8, p-value = 0.04845
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.903485261 -0.009279276
## sample estimates:
##        cor 
## -0.6351945

0.9 Export all phyloseq object in one table

In the table there is a column cluster - “exl” in it means that this is the part of the dataset that did not go to WGCNA - majors in single samples(OMs).

ps.m <- phyloseq::psmelt(ps.f)

ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-c("Group", "Bag")) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_vst)) {
  a <- taxa_names(l_vst[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.data.out.tsv", sep = "\t", row.names = FALSE)
ps.data.out.exl

0.9.1 Several pictures on the dynamics of representation of individual phyla

Bdellovibrionota - predators, an indicator of a developed community

ps.m %>% 
  filter(Phylum == "Bdellovibrionota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()

## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Myxococcota - sort of the same, but as we know can be cellulotic(facultative predators)

ps.m %>% 
  filter(Phylum == "Myxococcota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()

## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Археи появляются тоже на поздних стадиях - вообще я бы хотел бы опять развить тему важности азотного метаболизма на поздних стадиях разложения.
Оч хочется метагеном, но не этот.
Кроме того хочется отметить, что эти минорные группы возникают на D12

ps.m %>% 
  filter(Phylum == "Crenarchaeota") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()

## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

Gammaproteobacteria - they are a third of the cluster blue

ps.m %>% 
  filter(Class == "Gammaproteobacteria") %>% 
  group_by(Description, Day) %>% 
  summarise(Bs = sum(Abundance)) %>% 
  ggplot() +
  geom_boxplot(aes(x = Day, y = Bs)) +
  theme_bw()

## `summarise()` has grouped output by 'Description'. You can override using the
## `.groups` argument.

0.9.2 Boxplots by phyla

All phyla - the representation is logarithmic on the basis of 2
Separate points are sums of absolute values of reads by days
The sums are logarithmic, but not the individual phylotypes

#select only major phylums
top_phylum <- ps.m %>% 
  count(Phylum) %>% 
  arrange(desc(n)) %>% 
  top_n(10) %>% 
  pull(Phylum)

## Selecting by n

ps.m %>% 
  filter(Phylum %in% top_phylum) %>% 
  mutate(
    Phylum = as.character(Phylum),
    Class = as.character(Class),
    phylum = ifelse(Phylum == "Proteobacteria", Class, Phylum)
  ) %>% 
  group_by(Description, Day, phylum) %>% 
  filter(!is.na(phylum)) %>% 
  summarise(Bs = log2(sum(Abundance))) %>% 
  ggplot(aes(x = Day, y = Bs)) +
  geom_boxplot(fill="#4DBBD5B2", alpha=0.4) +
  theme_bw() +
  facet_wrap(~ phylum)

## `summarise()` has grouped output by 'Description', 'Day'. You can override
## using the `.groups` argument.

## Warning: Removed 54 rows containing non-finite values (`stat_boxplot()`).

0.9.3 resperation and phylotypes correlations

little_res <- resp_median_bags %>% 
  filter(day %in% ps.varstab@sam_data$Day)

ps.varstab.merged <- phyloseq::merge_samples(ps.varstab, "Day")

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'

## Also defined by 'tidytree'

otus_cor <- ps.varstab.merged@otu_table %>% 
  as.data.frame()

s1 <- sapply(colnames(otus_cor), function(x) {
  cor <- cor.test(otus_cor[[x]], little_res$rel, method="spearman", exact = FALSE)
  c1 <- cor$p.value
  c2 <- cor$estimate %>% unname()
  return(list("p.value"=c1, "rho"=c2))})

s1 <- s1 %>% 
  t() %>% 
  as.data.frame() %>% 
  unnest(cols = c(p.value, rho), .id = "id") %>% 
  column_to_rownames("id") %>% 
  mutate(p.adj = p.adjust(p.value, method = "BH", n = length(p.value)))


taxa <- as.data.frame(ps.varstab@tax_table@.Data) %>% 
  rownames_to_column("ID")

s1 %>%
  rownames_to_column("ID") %>% 
  left_join(taxa) %>% 
  arrange(rho) %>%
  filter(Phylum %in% c("Proteobacteria", "Acidobacteriota", "Actinobacteriota","Chloroflexi", "Planctomycetota", "Bacteroidota", "Verrucomicrobiota", "Firmicutes")) %>% 
  mutate(Rank = ifelse(Phylum == "Proteobacteria", Class, Phylum)) %>%
  ggplot(aes(x=rho, color=Rank, fill=Rank)) +
  geom_histogram(alpha=0.3, bins = 30) +
  theme_minimal() +
  facet_wrap(~ Rank) +
  theme(legend.position="none")

## Joining with `by = join_by(ID)`

s1.otus <- s1 %>% 
  rownames_to_column("OTU")

ps.data.out.exl %>% 
  select_if(Negate(is.integer)) %>% 
  distinct() %>% 
  left_join(s1.otus, by="OTU") %>%
  filter(rho >= 0) %>%
  arrange(p.adj) %>% 
  head() %>% 
  DT::datatable()

0.10 Additions

Part added after rounds of the peer-review. Thank you so much to reviewer two, you are great!

realtime.stat <- readxl::read_excel("cell_realtime_stat_ph.xlsx")
realtime.names <- readxl::read_excel("cell_realtime_stat_gh_names.xlsx")

gh_taxa <- readr::read_csv("kraken_gh_full_contig_207_corrected.csv") %>% 
  mutate_if(., is.character, as.factor)

## Rows: 23 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Cellulase, Taxa_Genus, GH_Family, Taxa_Family, Contig_name, Family_...
## dbl (1): Contig_identifier
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

gh_taxa %>% 
  DT::datatable(caption = "Kraken2 based taxonomy for GH containing contigs")

reatl.m <- realtime.stat %>% 
  mutate(Rows = paste0(phase, "_", rep(seq(1:9), 3))) %>% 
  relocate(Rows) %>% 
  select(-phase)  %>% 
  column_to_rownames("Rows") %>% 
  select_if(is.numeric) %>% 
  as.matrix()

names.d <- gh_taxa %>% 
  rename("label" = "Cellulase") %>% 
  mutate(name = paste0(Genus_new, " ", GH_Family)) %>% 
  select(c("label", "Family_new", "name"))

# reatl.m[reatl.m < 0] <- 0
hcl <- hclust(vegan::vegdist(t(reatl.m), method="euclidean"), "mcquitty")

dhc <- as.dendrogram(hcl)
ddata <- ggdendro::dendro_data(dhc, type = "rectangle")
ddata$labels <- left_join(ddata$labels, names.d, by = "label")

ggplot(ggdendro::segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  geom_text(data = ddata$labels, 
              aes(x = x, y = y, label = name, color = Family_new), size = 4, hjust = -0.1) +
  coord_flip() + 
  scale_y_reverse(expand = c(1.1, 0)) +
  scale_color_brewer(palette="Dark2") +
  ggdendro::theme_dendro() +
    guides(order = 1,
         fill = guide_legend(override.aes = list(shape = 55, size = 9), 
                             shape = guide_legend(order = 44))) +
  theme(legend.position="bottom",
        legend.title = element_blank(),
        legend.key.size=unit(0.56, 'lines'),
        plot.title = element_text(hjust = 1)) +
  ggtitle("WPGMA based on euclidean distance of GH's RealTime")

Add contigs name

names.d <- gh_taxa %>% 
  rename("label" = "Cellulase") %>% 
  mutate(name = paste0(Genus_new, " ", GH_Family, " ", Contig_identifier)) %>% 
  select(c("label", "Family_new", "name"))

# reatl.m[reatl.m < 0] <- 0
hcl <- hclust(vegan::vegdist(t(reatl.m), method="euclidean"), "mcquitty")

dhc <- as.dendrogram(hcl)
ddata <- ggdendro::dendro_data(dhc, type = "rectangle")
ddata$labels <- left_join(ddata$labels, names.d, by = "label")

ggplot(ggdendro::segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  geom_text(data = ddata$labels, 
              aes(x = x, y = y, label = name, color = Family_new), size = 4, hjust = -0.1) +
  coord_flip() + 
  scale_y_reverse(expand = c(1.1, 0)) +
  scale_color_brewer(palette="Dark2") +
  ggdendro::theme_dendro() +
    guides(order = 1,
         fill = guide_legend(override.aes = list(shape = 55, size = 9), 
                             shape = guide_legend(order = 44))) +
  theme(legend.position="bottom",
        legend.title = element_blank(),
        legend.key.size=unit(0.56, 'lines'),
        plot.title = element_text(hjust = 1)) +
  ggtitle("WPGMA based on euclidian distance of GH's RealTime")

cor_test_mat <- psych::corr.test(realtime_matrix)$p
corrplot::corrplot(m, p.mat = cor_test_mat, method = 'circle', type = 'lower', insig='blank',
         order = 'AOE', diag = FALSE)$corrPos -> p1
# text(p1$x, p1$y, round(p1$corr, 2))

ddata

library(ggpubr)
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

real_long <- pivot_longer(realtime.stat, colnames(realtime.stat)[2:length(colnames(realtime.stat))]) %>%
  mutate(phase = factor(phase, levels = c("early", "middle", "late")))

stat.test <- real_long %>%
  group_by(name) %>%
  tukey_hsd(value ~ phase) %>%
  add_xy_position(x = "name",
                  dodge = 0.8,
                  step.increase = 0.05)

real_long %>% 
  ggplot(aes(x=name, y=value, col=phase)) +
    geom_boxplot() +
    geom_point(size=0.4, alpha=0.9, position=position_dodge(width=0.75),aes(group=phase)) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) + 
  labs( title = "Realtime of selected GH") +
  ylab("log2foldchange") +
  stat_pvalue_manual(
    stat.test,
    label = "p.adj.signif", 
    tip.length = 0.005,
    hide.ns = TRUE,
    bracket.nudge.y = -0.5
  )

realtime.big <- readxl::read_excel("straw_rt_stat_gr.xlsx")

## New names:
## • `` -> `...1`

realtime.big <- realtime.big %>% 
  group_by(day) %>% 
  mutate_at(vars(FUN), 
            ~replace_na(., 
                        mean(., na.rm = TRUE)))

realtime.big.long <- realtime.big %>% 
  filter(phase != "no") %>% 
  pivot_longer(colnames(realtime.big)[4:length(colnames(realtime.big))]) %>% 
  mutate(phase = factor(phase, levels = c("early", "middle", "late"))) %>% 
  mutate(value = log2(value)) 

stat.test <- realtime.big.long %>%
  group_by(name) %>%
  tukey_hsd(value ~ phase) %>%
  add_xy_position(x = "name", dodge = 0.5)

realtime.big.long %>% 
  ggplot(aes(x=name, y=value, col=phase)) +
    geom_boxplot() +
    geom_point(size=0.4, alpha=0.9, position=position_dodge(width=0.75),aes(group=phase)) +
    theme_light() + 
    scale_color_brewer(palette="Dark2") +
    theme(axis.text.x = element_text(angle = 90),
          axis.title.x=element_blank()) + 
  labs(
  title = "Realtime"
) +
ylab("operons per gram, log2") +
  stat_pvalue_manual(
  stat.test,  label = "{p.adj} {p.adj.signif}", 
  tip.length = 0.005, hide.ns = TRUE
  )

Permanova based on GH specific RealTime data

real_matrix <- realtime.stat %>% 
  select(-phase) %>% 
  as.matrix() %>% 
  t()

gh_meta <- gh_taxa %>% 
  column_to_rownames("Cellulase") %>% 
  select(GH_Family, Genus_new, Family_new) 

meta <- gh_taxa %>% 
  select(GH_Family, Genus_new, Family_new) %>% 
  t() %>% 
  as.data.frame() %>% 
  setNames(gh_taxa$Cellulase)
  
d <- dist(real_matrix, method = "euclidian")

table1 <- phyloseq::distance(ps.f, "bray")
table2 <- as(sample_data(ps.f@sam_data), "data.frame")

list(
  vegan::adonis2(d ~ GH_Family + Genus_new + Family_new, data = gh_meta),
  vegan::adonis2(d ~ Genus_new  + GH_Family, data = gh_meta),
  vegan::adonis2(d ~ GH_Family + Genus_new, data = gh_meta),
  vegan::adonis2(d ~ GH_Family*Genus_new, data = gh_meta)
)

## [[1]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family + Genus_new + Family_new, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)   
## GH_Family  7   3479.3 0.41290 3.1557  0.009 **
## Genus_new  6   3529.7 0.41888 3.7349  0.005 **
## Residual   9   1417.6 0.16823                 
## Total     22   8426.6 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[2]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ Genus_new + GH_Family, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)    
## Genus_new  6   5268.9 0.62527 5.5753  0.001 ***
## GH_Family  7   1740.1 0.20650 1.5783  0.162    
## Residual   9   1417.6 0.16823                  
## Total     22   8426.6 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[3]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family + Genus_new, data = gh_meta)
##           Df SumOfSqs      R2      F Pr(>F)   
## GH_Family  7   3479.3 0.41290 3.1557  0.013 * 
## Genus_new  6   3529.7 0.41888 3.7349  0.007 **
## Residual   9   1417.6 0.16823                 
## Total     22   8426.6 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## [[4]]
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = d ~ GH_Family * Genus_new, data = gh_meta)
##                     Df SumOfSqs      R2      F Pr(>F)  
## GH_Family            7   3479.3 0.41290 3.0006  0.036 *
## Genus_new            6   3529.7 0.41888 3.5514  0.017 *
## GH_Family:Genus_new  4    589.3 0.06994 0.8894  0.561  
## Residual             5    828.2 0.09829                
## Total               22   8426.6 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

0.11 ITS

Same analysis, but much quicker and dirtier.

library(DESeq2)

ps_its_bulk <- readRDS("../../its/d2/ps_its")

sample.data <- ps_its_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Group = if_else(Day %in% c("D01", "D03", "D05"), "early", 
                         if_else(Day %in% c("D07", "D08","D10"), "middle", "late"))) %>% 
  mutate(Group = factor(Group, levels=c("early", "middle","late"))) %>% 
  mutate(Day = Day %>% 
           forcats::fct_recode( "3" = "D01",
                                "14" = "D03",
                                "28" = "D05",
                                "49" = "D07" ,
                                "63" = "D08",
                                "91" = "D10",
                                "119" = "D12",
                                "140" = "D13",
                                "161" = "D14",
                                "182" = "D15")
         ) %>% 
  phyloseq::sample_data()

sample_data(ps_its_bulk) <- sample.data

ps_its <- prune_samples(!sample_data(ps_its_bulk)$Day %in% "bulk", ps_its_bulk)
ps_its  <- prune_taxa(taxa_sums(ps_its) > 0, ps_its)

amp_its <- phyloseq_to_ampvis2(ps_its)

amp_its_bulk <- phyloseq_to_ampvis2(ps_its_bulk)

ps.its.inall <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
ps.its.inall <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)
Biostrings::writeXStringSet(ps.its.inall@refseq, file="ref_inall.fasta")
Biostrings::writeXStringSet(ps_its@refseq, file="ref.fasta")

tree <- ape::read.tree("../../its/d2/al.fasta.contree")

ps.its.inall@phy_tree <- tree
ps.its.inall  <- prune_taxa(taxa_sums(ps.its.inall) > 0, ps.its.inall)


ps.its.exl  <- phyloseq::filter_taxa(ps_its, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.its.exl  <- prune_taxa(taxa_sums(ps.its.exl) > 100, ps.its.exl)
ps.its.exl.taxa <- taxa_names(ps.its.exl)

amp_its_inall <- phyloseq_to_ampvis2(ps.its.inall)

ps_its@sam_data %>% 
  DT::datatable()

Base statistics for its

With bulk soil

amp_its_bulk

## ampvis2 object with 4 elements. 
## Summary of OTU table:
## [1]       42     3178   460040     1355    39372   8278.5 10953.33
## 
## Assigned taxonomy:
##      Kingdom       Phylum        Class        Order       Family        Genus 
##    1379(43%) 1213(38.17%) 1127(35.46%) 1086(34.17%) 1046(32.91%)   998(31.4%) 
##      Species 
##  482(15.17%) 
## 
## Metadata variables: 4 
##  SampleID, Day, Description, Group

amp_its

## ampvis2 object with 4 elements. 
## Summary of OTU table:
## [1]     36   1264 275148   1355  14380   7211   7643
## 
## Assigned taxonomy:
##     Kingdom      Phylum       Class       Order      Family       Genus 
##    416(33%) 325(25.71%) 279(22.07%) 263(20.81%) 247(19.54%) 233(18.43%) 
##     Species 
## 162(12.82%) 
## 
## Metadata variables: 4 
##  SampleID, Day, Description, Group

amp_heatmap(amp_its_bulk,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

amp_heatmap(amp_its_bulk,
            tax_show = 7,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_add = "Kingdom",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

Major ITS phylotypes

amp_heatmap(amp_its,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

All by the phylums

amp_heatmap(amp_its,
            tax_show = 7,
            group_by = "Day",
            tax_aggregate = "Phylum",
            tax_add = "Kingdom",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

There are going to WGCNA pipeline(116 phylotypes)

amp_heatmap(amp_its_inall,
            tax_show = 40 ,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=TRUE,
            showRemainingTaxa = TRUE)

## Warning: Transformation introduced infinite values in discrete y-axis

beta_custom_norm_NMDS_elli_w(ps_its_bulk, Group="Day", Color="Day")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1414574 
## Run 1 stress 0.1516495 
## Run 2 stress 0.1420709 
## Run 3 stress 0.1414477 
## ... New best solution
## ... Procrustes: rmse 0.003887442  max resid 0.01937902 
## Run 4 stress 0.1536413 
## Run 5 stress 0.141642 
## ... Procrustes: rmse 0.01429757  max resid 0.06532445 
## Run 6 stress 0.1421837 
## Run 7 stress 0.1414477 
## ... Procrustes: rmse 0.00001939437  max resid 0.00006024144 
## ... Similar to previous best
## Run 8 stress 0.1537918 
## Run 9 stress 0.1414448 
## ... New best solution
## ... Procrustes: rmse 0.002782189  max resid 0.01293661 
## Run 10 stress 0.1414467 
## ... Procrustes: rmse 0.006632629  max resid 0.03482667 
## Run 11 stress 0.1444901 
## Run 12 stress 0.1532385 
## Run 13 stress 0.1443772 
## Run 14 stress 0.1414574 
## ... Procrustes: rmse 0.004649221  max resid 0.01894334 
## Run 15 stress 0.1519091 
## Run 16 stress 0.1415836 
## ... Procrustes: rmse 0.01176699  max resid 0.06797239 
## Run 17 stress 0.1441972 
## Run 18 stress 0.1531946 
## Run 19 stress 0.1523984 
## Run 20 stress 0.1524871 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     19: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin

beta_custom_norm_NMDS_elli_w(ps_its, Group="Day", Color="Day")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1873573 
## Run 1 stress 0.1890853 
## Run 2 stress 0.1877849 
## ... Procrustes: rmse 0.06797956  max resid 0.3258443 
## Run 3 stress 0.1873573 
## ... Procrustes: rmse 0.000002643625  max resid 0.000008942489 
## ... Similar to previous best
## Run 4 stress 0.2246903 
## Run 5 stress 0.1910365 
## Run 6 stress 0.1886435 
## Run 7 stress 0.220317 
## Run 8 stress 0.1873573 
## ... Procrustes: rmse 0.000002489462  max resid 0.00001005272 
## ... Similar to previous best
## Run 9 stress 0.1873573 
## ... New best solution
## ... Procrustes: rmse 0.000002900432  max resid 0.00001212819 
## ... Similar to previous best
## Run 10 stress 0.1881727 
## Run 11 stress 0.1873573 
## ... New best solution
## ... Procrustes: rmse 0.000003703707  max resid 0.00001672818 
## ... Similar to previous best
## Run 12 stress 0.2203009 
## Run 13 stress 0.1881727 
## Run 14 stress 0.2098111 
## Run 15 stress 0.1910365 
## Run 16 stress 0.1877849 
## ... Procrustes: rmse 0.06797676  max resid 0.3258368 
## Run 17 stress 0.1873573 
## ... Procrustes: rmse 0.00003059685  max resid 0.0001532429 
## ... Similar to previous best
## Run 18 stress 0.1881727 
## Run 19 stress 0.1881727 
## Run 20 stress 0.1891497 
## *** Best solution repeated 2 times

With bulk soil

p1 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_its_bulk, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

Without bulk soil

p1 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)

The first plot is clr normalization (compositional), the second is vst(DESeq2). In contrast to bacteria - for vst WGCNA did not show good results (no plateau yield).
We still applied vst stabilization, but we have to keep in mind that the resulting output is not great.

otu.inall <- as.data.frame(ps.its.inall@otu_table@.Data) 
ps.inall.clr <- ps.its.inall
otu_table(ps.inall.clr) <- phyloseq::otu_table(compositions::clr(otu.inall), taxa_are_rows = FALSE)
data <- ps.inall.clr@otu_table@.Data %>% 
  as.data.frame()
rownames(data) <- as.character(ps.inall.clr@sam_data$Description)
powers <-  c(c(1:10), seq(from = 12, to=30, by=1))
sft <-  pickSoftThreshold(data, powerVector = powers, verbose = 5, networkType = "signed")

## pickSoftThreshold: will use block size 68.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 68 of 68
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.05110 -6.85         0.6370 33.1000 33.200000 34.400
## 2      2  0.00907  1.51         0.6780 17.1000 17.100000 18.600
## 3      3  0.24300 -3.64         0.7670  9.2400  9.200000 11.000
## 4      4  0.58200 -4.21         0.6270  5.2100  5.110000  7.230
## 5      5  0.16800 -8.96        -0.0691  3.0800  2.910000  5.190
## 6      6  0.18400 -6.75        -0.0474  1.9100  1.700000  4.000
## 7      7  0.25400 -6.07         0.0954  1.2500  1.020000  3.260
## 8      8  0.26300 -5.21         0.1110  0.8510  0.625000  2.760
## 9      9  0.25800 -4.44         0.1060  0.6080  0.404000  2.410
## 10    10  0.25700 -3.88         0.1160  0.4520  0.270000  2.140
## 11    12  0.26300 -4.44         0.0797  0.2770  0.131000  1.750
## 12    13  0.24600 -3.92         0.0718  0.2260  0.092900  1.600
## 13    14  0.24600 -3.64         0.0780  0.1870  0.065400  1.480
## 14    15  0.24900 -3.50         0.0837  0.1580  0.046500  1.370
## 15    16  0.24700 -3.35         0.0873  0.1350  0.033400  1.270
## 16    17  0.24600 -3.23         0.0902  0.1170  0.024300  1.180
## 17    18  0.24900 -3.16         0.0964  0.1020  0.017800  1.100
## 18    19  0.26000 -3.64         0.0650  0.0899  0.013200  1.030
## 19    20  0.24300 -3.46         0.0444  0.0796  0.009800  0.962
## 20    21  0.24800 -3.45         0.0510  0.0708  0.007320  0.902
## 21    22  0.25300 -3.36         0.0584  0.0633  0.005490  0.847
## 22    23  0.25200 -3.26         0.0591  0.0569  0.004140  0.797
## 23    24  0.25200 -3.16         0.0596  0.0512  0.003130  0.751
## 24    25  0.25900 -3.21         0.0656  0.0463  0.002370  0.708
## 25    26  0.26300 -3.14         0.0712  0.0420  0.001800  0.668
## 26    27  0.25700 -3.39         0.0457  0.0383  0.001370  0.632
## 27    28  0.26300 -3.46         0.0544  0.0349  0.001050  0.598
## 28    29  0.25900 -3.46         0.0499  0.0319  0.000799  0.566
## 29    30  0.26400 -3.41         0.0559  0.0293  0.000612  0.536

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

ps.varstab <- ps_vst(ps.its.inall, "Day")

data2 <- ps.varstab@otu_table@.Data %>% 
  as.data.frame()

rownames(data2) <- as.character(ps.varstab@sam_data$Description)
powers <-  c(seq(from = 1, to=10, by=0.5), seq(from = 11, to=20, by=1))
sft2 <-  pickSoftThreshold(data2, powerVector = powers, verbose = 5, networkType = "signed hybrid")

## pickSoftThreshold: will use block size 68.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 68 of 68
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1    1.0   0.3680  0.881         0.7030   8.490   8.35000  12.80
## 2    1.5   0.0705  0.305         0.6220   5.500   5.26000   9.62
## 3    2.0   0.0623 -0.354         0.1520   3.880   3.65000   7.92
## 4    2.5   0.2430 -0.521         0.2930   2.920   2.63000   6.87
## 5    3.0   0.4830 -0.687         0.4270   2.310   1.91000   6.17
## 6    3.5   0.5750 -0.838         0.4820   1.910   1.43000   5.72
## 7    4.0   0.5600 -0.897         0.4340   1.620   1.08000   5.41
## 8    4.5   0.0757 -1.690        -0.1430   1.410   0.81600   5.18
## 9    5.0   0.0816 -1.660        -0.1350   1.260   0.64800   5.00
## 10   5.5   0.1680 -3.190        -0.0669   1.130   0.51500   4.85
## 11   6.0   0.1570 -3.480        -0.0604   1.040   0.41400   4.73
## 12   6.5   0.1570 -3.350        -0.0651   0.963   0.34000   4.63
## 13   7.0   0.1630 -3.310        -0.0593   0.900   0.27100   4.54
## 14   7.5   0.1670 -3.340        -0.0540   0.847   0.21400   4.46
## 15   8.0   0.1640 -3.260        -0.0578   0.802   0.17300   4.39
## 16   8.5   0.1020 -2.670        -0.0433   0.764   0.14200   4.32
## 17   9.0   0.1330 -2.920        -0.0212   0.730   0.11600   4.26
## 18   9.5   0.0611 -1.740         0.1050   0.701   0.09720   4.20
## 19  10.0   0.0597 -1.780         0.1240   0.675   0.08180   4.14
## 20  11.0   0.0674 -1.800         0.1270   0.630   0.05830   4.03
## 21  12.0   0.0358 -1.140         0.1310   0.593   0.04190   3.93
## 22  13.0   0.0003 -0.122         0.3070   0.562   0.02940   3.84
## 23  14.0   0.0693 -1.780         0.0436   0.536   0.02040   3.76
## 24  15.0   0.0414 -1.440         0.2380   0.512   0.01430   3.68
## 25  16.0   0.0458 -1.450         0.2260   0.491   0.01000   3.60
## 26  17.0   0.0509 -1.470         0.2150   0.473   0.00715   3.52
## 27  18.0   0.0398 -1.160         0.0947   0.456   0.00516   3.45
## 28  19.0   0.0443 -1.170         0.0743   0.440   0.00374   3.38
## 29  20.0   0.0475 -1.180         0.0675   0.426   0.00271   3.32

plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"))
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2], labels=powers,cex=0.9,col="red")
abline(h=0.9,col="salmon")

# WGCNA, as old and not supported
detachAllPackages()
library(WGCNA)

net3 <- WGCNA::blockwiseModules(data2,
                          power=4,
                          TOMType="signed",
                          networkType="signed hybrid",
                          nThreads=0)

##      mergeCloseModules: less than two proper modules.
##       ..color levels are grey, turquoise
##       ..there is nothing to merge.

mergedColors2 <-  WGCNA::labels2colors(net3$colors, colorSeq = c("salmon", "darkgreen", "cyan", "red", "blue", "plum"))

plotDendroAndColors(
  net3$dendrograms[[1]],
  mergedColors2[net3$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05)

library(phyloseq)
library(tidyverse)
library(ggpubr)
library(ampvis2)
library(heatmaply)
library(WGCNA)
library(phyloseq)
library(ggtree)
library(tidyverse)
library(KneeArrower)

The green cluster is the background, which in the case of the red cluster for bacteria that here, I don’t think it can be called a cluster at all.
WGCNA as an analysis helps clustering based on correlations by finding a soft threshold(power, see plot where the red numbers still can’t reach the plateau),
the darkgreen and red cluster are simply unconnected phylotypes.

modules_of_interest = mergedColors2 %>% 
  unique()

module_df <- data.frame(
  asv = names(net3$colors),
  colors = mergedColors2
)

# module_df[module_df == "yellow"] <- "salmon"

submod <-  module_df %>%
  subset(colors %in% modules_of_interest)

row.names(module_df) = module_df$asv

subexpr = as.data.frame(t(data2))[submod$asv,]

submod_df <- data.frame(subexpr) %>%
  mutate(
    asv = row.names(.)
  ) %>%
  pivot_longer(-asv) %>%
  mutate(
    module = module_df[asv,]$colors
  )

submod_df <- submod_df %>% 
  mutate(name = gsub("\\_.*","",submod_df$name)) %>% 
  group_by(name, asv) %>% 
  summarise(value = mean(value), asv = asv, module = module) %>% 
  relocate(c(asv, name, value, module)) %>% 
  ungroup() %>% 
  mutate(module = as.factor(module))

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.

p <- submod_df %>% 
  ggplot(., aes(x=name, y=value, group=asv)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none") +
  facet_grid(rows = vars(module)) +
  labs(x = "day",
       y = "normalized abundance")

p + scale_color_manual(values = levels(submod_df$module)) +
  scale_x_discrete(labels=(c("D01"="3",
                                "D03"="14",
                                "D05"="28",
                                "D07"="49" ,
                                "D08"="63",
                                "D10"="91",
                                "D12"="119",
                                "D13"="140",
                                "D14"="161",
                                "D15"="182")
         ))

l_its <- color_filt_broken(ps_its, submod_df, ps.its.inall)
l_its

$darkgreen $darkgreen$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 25 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 3 sample variables ] tax_table() Taxonomy Table: [ 25 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 25 reference sequences ]

$darkgreen$amp ampvis2 object with 4 elements. Summary of OTU table: [1] 36 25 84467 0 6856 1935.5 2346.31

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 16(64%) 12(48%) 9(36%) 7(28%) 7(28%) 7(28%) 6(24%)

Metadata variables: 4 SampleID, Day, Description, Group

$darkgreen$heat

$darkgreen$heat_rel

$darkgreen$tree

$darkgreen$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq1	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Chaetosphaeriales	f__Chaetosphaeriaceae	g__Chloridium	s__aseptatum
Seq12	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Coniochaetales	f__Coniochaetaceae	g__Lecythophora	s__canina
Seq16	k__Fungi	p__Ascomycota	c__Sordariomycetes	NA	NA	NA	NA
Seq13	k__Fungi	NA	NA	NA	NA	NA	NA
Seq69	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Clavicipitaceae	g__Marquandomyces	s__marquandii
Seq43	NA	NA	NA	NA	NA	NA	NA
Seq63	NA	NA	NA	NA	NA	NA	NA
Seq100	NA	NA	NA	NA	NA	NA	NA
Seq42	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Stachybotryaceae	g__Stachybotrys	s__terrestris
Seq45	k__Fungi	p__Ascomycota	NA	NA	NA	NA	NA
Seq21	NA	NA	NA	NA	NA	NA	NA
Seq195	k__Fungi	p__Ascomycota	NA	NA	NA	NA	NA
Seq138	k__Fungi	p__Ascomycota	NA	NA	NA	NA	NA
Seq29	NA	NA	NA	NA	NA	NA	NA
Seq8	k__Fungi	NA	NA	NA	NA	NA	NA
Seq54	k__Fungi	p__Ascomycota	c__Leotiomycetes	o__Helotiales	f__Helotiaceae	g__Scytalidium	NA
Seq79	NA	NA	NA	NA	NA	NA	NA
Seq125	NA	NA	NA	NA	NA	NA	NA
Seq41	k__Fungi	NA	NA	NA	NA	NA	NA
Seq76	NA	NA	NA	NA	NA	NA	NA
Seq124	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Chaetosphaeriales	f__Chaetosphaeriaceae	g__Chloridium	s__aseptatum
Seq20	k__Fungi	p__Ascomycota	c__Sordariomycetes	NA	NA	NA	NA
Seq32	k__Fungi	NA	NA	NA	NA	NA	NA
Seq132	NA	NA	NA	NA	NA	NA	NA
Seq40	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Clavicipitaceae	g__Marquandomyces	s__marquandii

$salmon $salmon$ps phyloseq-class experiment-level object otu_table() OTU Table: [ 43 taxa and 36 samples ] sample_data() Sample Data: [ 36 samples by 3 sample variables ] tax_table() Taxonomy Table: [ 43 taxa by 7 taxonomic ranks ] refseq() DNAStringSet: [ 43 reference sequences ]

$salmon$amp ampvis2 object with 4 elements. Summary of OTU table: [1] 36 43 116231 124 10633 2510.5 3228.64

Assigned taxonomy: Kingdom Phylum Class Order Family Genus Species 40(93%) 40(93.02%) 37(86.05%) 37(86.05%) 36(83.72%) 33(76.74%) 29(67.44%)

Metadata variables: 4 SampleID, Day, Description, Group

$salmon$heat

$salmon$heat_rel

$salmon$tree

$salmon$taxa

	Kingdom	Phylum	Class	Order	Family	Genus	Species
Seq2	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	s__equiseti
Seq49	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq6	k__Fungi	p__Mucoromycota	c__Mucoromycetes	o__Mucorales	f__Mucoraceae	g__Actinomucor	s__elegans
Seq10	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Lasiosphaeriaceae	g__Schizothecium	s__inaequale
Seq3	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq5	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq7	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Psathyrellaceae	g__Coprinellus	s__flocculosus
Seq19	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Psathyrellaceae	g__Coprinellus	s__flocculosus
Seq53	k__Viridiplantae	p__Anthophyta	NA	NA	NA	NA	NA
Seq99	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Chaetomiaceae	g__Parachaetomium	s__iranianum
Seq30	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq50	k__Fungi	p__Basidiomycota	c__Cystobasidiomycetes	o__Cystobasidiales	f__Cystobasidiaceae	g__Occultifur	s__kilbournensis
Seq78	k__Metazoa	p__Nematoda	c__Chromadorea	o__Rhabditida	f__Cephalobidae	g__Acrobeloides	NA
Seq101	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Lasiosphaeriaceae	g__Schizothecium	s__inaequale
Seq4	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	s__oxysporum
Seq175	NA	NA	NA	NA	NA	NA	NA
Seq62	k__Fungi	p__Basidiomycota	c__Cystobasidiomycetes	o__Cystobasidiales	f__Cystobasidiaceae	g__Occultifur	NA
Seq48	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Stachybotryaceae	g__Albifimbria	s__verrucaria
Seq36	k__Fungi	p__Mucoromycota	c__Mucoromycetes	o__Mucorales	f__Mucoraceae	g__Actinomucor	s__elegans
Seq9	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Stachybotryaceae	g__Albifimbria	s__verrucaria
Seq11	k__Fungi	p__Mucoromycota	c__Mucoromycetes	o__Mucorales	f__Mucoraceae	g__Actinomucor	s__elegans
Seq74	k__Fungi	p__Mucoromycota	c__Mucoromycetes	o__Mucorales	f__Mucoraceae	g__Actinomucor	s__elegans
Seq89	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq33	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	s__oxysporum
Seq22	k__Fungi	p__Basidiomycota	c__Cystobasidiomycetes	o__Cystobasidiales	f__Cystobasidiaceae	g__Occultifur	s__kilbournensis
Seq31	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	NA
Seq28	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Corticiales	f__Corticiaceae	g__Waitea	s__circinata
Seq46	NA	NA	NA	NA	NA	NA	NA
Seq85	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa
Seq121	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Coniochaetales	f__Coniochaetaceae	NA	NA
Seq24	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Chaetomiaceae	NA	NA
Seq27	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Lasiosphaeriaceae	g__Schizothecium	s__inaequale
Seq66	k__Fungi	p__Mucoromycota	c__Mucoromycetes	o__Mucorales	f__Mucoraceae	g__Actinomucor	s__elegans
Seq114	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Psathyrellaceae	g__Coprinellus	s__flocculosus
Seq161	k__Fungi	p__Ascomycota	c__Dothideomycetes	o__Pleosporales	NA	NA	NA
Seq96	NA	NA	NA	NA	NA	NA	NA
Seq15	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	s__equiseti
Seq94	k__Viridiplantae	p__Anthophyta	NA	NA	NA	NA	NA
Seq155	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Sordariales	f__Chaetomiaceae	NA	NA
Seq152	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	s__oxysporum
Seq39	k__Fungi	p__Ascomycota	c__Sordariomycetes	o__Hypocreales	f__Nectriaceae	g__Fusarium	NA
Seq180	k__Viridiplantae	p__Anthophyta	NA	NA	NA	NA	NA
Seq52	k__Fungi	p__Basidiomycota	c__Agaricomycetes	o__Agaricales	f__Bolbitiaceae	g__Conocybe	s__crispa

p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Group", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

p.observed <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Observed")) + 
  theme(axis.title.y = element_blank())

p.shannon <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("Shannon")) + 
  theme(axis.title.y = element_blank())

p.simpson <- plot_alpha_w_toc(ps = ps_its, group = "Day", metric = c("InvSimpson")) + 
  theme(axis.title.y = element_blank())

ggpubr::ggarrange(p.observed, p.shannon, p.simpson, ncol = 3)

ps.m <- phyloseq::psmelt(ps_its)
ps.m <- ps.m %>% 
  mutate_if(is.character, as.factor) 

ps.data.out <- ps.m %>%
  select(-Group) %>% 
  pivot_wider(names_from = c(Day, Description, Sample), values_from = Abundance, values_fill = 0) 

#create empty dataframe with columnnames
external_empty_dataframe <- data.frame(OTU=factor(), cluster=factor(), stringsAsFactors = FALSE)

for (i in names(l_its)) {
  a <- taxa_names(l_its[[i]][["ps"]])
  b <- rep(i, length(a))
  d <- data.frame(OTU = as.factor(a),
                  cluster = as.factor(b))
  external_empty_dataframe <- rbind(external_empty_dataframe, d)
}

clusters.otu.df <- external_empty_dataframe
# add exl taxa -- taxa "exl"
d <- data.frame(OTU = as.factor(ps.its.exl.taxa),
                cluster = as.factor(rep("exl", length(ps.its.exl.taxa))))

clusters.otu.df <- rbind(clusters.otu.df, d)
ps.data.out.exl <- left_join(clusters.otu.df, ps.data.out, by="OTU")
# write.table(ps.data.out.exl, file = "ps.its.data.out.tsv", sep = "\t")

map_d_trial <- ps_its_bulk@sam_data %>% 
  data.frame() %>% 
  mutate(Bulk = ifelse(Day == "bulk", "bulk", "trial") %>% as.factor())

ps_its_bulk@sam_data <-  sample_data(map_d_trial)

amp_its_bulk_venn <- phyloseq_to_ampvis2(ps_its_bulk)
p.venn.cc <- amp_venn(amp_its_bulk_venn,
         group_by = "Bulk",
         normalise = TRUE,
         cut_a = 0.000001,
         cut_f=0.000001,
            )

p.venn.cc

# pw <- p.venn.cc

# ggarrange(pw, pw, pw, pw,pw, pw, pw, pw, ncol = 1, labels = paste0("venn", seq(1, 8)))

0.12 Packages info

sessionInfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3         reshape2_1.4.4        KneeArrower_1.0.0    
##  [4] ggtree_3.6.2          heatmaply_1.4.2       viridis_0.6.2        
##  [7] viridisLite_0.4.1     plotly_4.10.1         ampvis2_2.7.28       
## [10] ggpubr_0.6.0          lubridate_1.9.2       forcats_1.0.0        
## [13] stringr_1.5.0         dplyr_1.1.0           purrr_1.0.1          
## [16] readr_2.1.4           tidyr_1.3.0           tibble_3.1.8         
## [19] ggplot2_3.4.1         tidyverse_2.0.0       phyloseq_1.42.0      
## [22] WGCNA_1.72-1          fastcluster_1.2.3     dynamicTreeCut_1.63-1
## 
## loaded via a namespace (and not attached):
##   [1] rsvd_1.0.5                     Hmisc_4.8-0                   
##   [3] svglite_2.1.1                  class_7.3-21                  
##   [5] foreach_1.5.2                  crayon_1.5.2                  
##   [7] rbibutils_2.2.13               MASS_7.3-58.2                 
##   [9] rhdf5filters_1.10.0            nlme_3.1-162                  
##  [11] backports_1.4.1                impute_1.72.3                 
##  [13] rlang_1.0.6                    compositions_2.0-5            
##  [15] XVector_0.38.0                 readxl_1.4.2                  
##  [17] irlba_2.3.5.1                  nloptr_2.0.3                  
##  [19] ca_0.71.1                      scater_1.26.1                 
##  [21] BiocParallel_1.32.5            bit64_4.0.5                   
##  [23] glue_1.6.2                     rngtools_1.5.2                
##  [25] parallel_4.2.0                 vipor_0.4.5                   
##  [27] AnnotationDbi_1.60.0           BiocGenerics_0.44.0           
##  [29] tidyselect_1.2.0               SummarizedExperiment_1.28.0   
##  [31] XML_3.99-0.13                  zoo_1.8-11                    
##  [33] xtable_1.8-4                   magrittr_2.0.3                
##  [35] evaluate_0.20                  Rdpack_2.4                    
##  [37] scuttle_1.8.4                  cli_3.6.0                     
##  [39] zlibbioc_1.44.0                rstudioapi_0.14               
##  [41] doRNG_1.8.6                    MultiAssayExperiment_1.24.0   
##  [43] bslib_0.4.2                    rpart_4.1.19                  
##  [45] treeio_1.22.0                  BiocSingular_1.14.0           
##  [47] xfun_0.37                      multtest_2.54.0               
##  [49] cluster_2.1.4                  TSP_1.2-2                     
##  [51] biomformat_1.26.0              ggfittext_0.9.1               
##  [53] KEGGREST_1.38.0                expm_0.999-7                  
##  [55] ggrepel_0.9.3                  ape_5.7                       
##  [57] dendextend_1.16.0              Biostrings_2.66.0             
##  [59] png_0.1-8                      permute_0.9-7                 
##  [61] withr_2.5.0                    bitops_1.0-7                  
##  [63] ggforce_0.4.1                  plyr_1.8.8                    
##  [65] cellranger_1.1.0               e1071_1.7-13                  
##  [67] coda_0.19-4                    pillar_1.8.1                  
##  [69] cachem_1.0.7                   Rmpfr_0.9-1                   
##  [71] multcomp_1.4-22                DelayedMatrixStats_1.20.0     
##  [73] vctrs_0.5.2                    ellipsis_0.3.2                
##  [75] generics_0.1.3                 tools_4.2.0                   
##  [77] foreign_0.8-84                 beeswarm_0.4.0                
##  [79] munsell_0.5.0                  tweenr_2.0.2                  
##  [81] emmeans_1.8.4-1                proxy_0.4-27                  
##  [83] DelayedArray_0.24.0            fastmap_1.1.1                 
##  [85] compiler_4.2.0                 abind_1.4-5                   
##  [87] DescTools_0.99.48              decontam_1.18.0               
##  [89] GenomeInfoDbData_1.2.9         lattice_0.20-45               
##  [91] deldir_1.0-6                   utf8_1.2.3                    
##  [93] jsonlite_1.8.4                 scales_1.2.1                  
##  [95] gld_2.6.6                      ScaledMatrix_1.6.0            
##  [97] tidytree_0.4.2                 carData_3.0-5                 
##  [99] sparseMatrixStats_1.10.0       estimability_1.4.1            
## [101] lazyeval_0.2.2                 car_3.1-1                     
## [103] doParallel_1.0.17              latticeExtra_0.6-30           
## [105] checkmate_2.1.0                rmarkdown_2.20                
## [107] sandwich_3.0-2                 cowplot_1.1.1                 
## [109] webshot_0.5.4                  treemapify_2.5.5              
## [111] Biobase_2.58.0                 igraph_1.4.1                  
## [113] survival_3.5-3                 numDeriv_2016.8-1.1           
## [115] yaml_2.3.7                     systemfonts_1.0.4             
## [117] ANCOMBC_2.0.2                  htmltools_0.5.4               
## [119] memoise_2.0.1                  locfit_1.5-9.7                
## [121] seriation_1.4.1                IRanges_2.32.0                
## [123] gmp_0.7-1                      digest_0.6.31                 
## [125] assertthat_0.2.1               registry_0.5-1                
## [127] RSQLite_2.3.0                  yulab.utils_0.0.6             
## [129] Exact_3.2                      data.table_1.14.8             
## [131] blob_1.2.3                     vegan_2.6-4                   
## [133] S4Vectors_0.36.2               preprocessCore_1.60.2         
## [135] splines_4.2.0                  Formula_1.2-5                 
## [137] labeling_0.4.2                 DECIPHER_2.26.0               
## [139] Rhdf5lib_1.20.0                RCurl_1.98-1.10               
## [141] broom_1.0.3                    hms_1.1.2                     
## [143] rhdf5_2.42.0                   colorspace_2.1-0              
## [145] base64enc_0.1-3                mnormt_2.1.1                  
## [147] ggbeeswarm_0.7.1               GenomicRanges_1.50.2          
## [149] aplot_0.1.9                    nnet_7.3-18                   
## [151] TreeSummarizedExperiment_2.6.0 sass_0.4.5                    
## [153] mia_1.6.0                      Rcpp_1.0.10                   
## [155] mvtnorm_1.1-3                  fansi_1.0.4                   
## [157] tzdb_0.3.0                     R6_2.5.1                      
## [159] grid_4.2.0                     lifecycle_1.0.3               
## [161] signal_0.7-7                   rootSolve_1.8.2.3             
## [163] ggsignif_0.6.4                 minqa_1.2.5                   
## [165] jquerylib_0.1.4                robustbase_0.95-0             
## [167] Matrix_1.5-3                   TH.data_1.1-1                 
## [169] RColorBrewer_1.1-3             iterators_1.0.14              
## [171] htmlwidgets_1.6.1              beachmat_2.14.0               
## [173] polyclip_1.10-4                crosstalk_1.2.0               
## [175] timechange_0.2.0               gridGraphics_0.5-1            
## [177] rvest_1.0.3                    mgcv_1.8-42                   
## [179] CVXR_1.0-11                    lmom_2.9                      
## [181] htmlTable_2.4.1                patchwork_1.1.2               
## [183] tensorA_0.36.2                 codetools_0.2-19              
## [185] matrixStats_0.63.0             GO.db_3.16.0                  
## [187] psych_2.2.9                    SingleCellExperiment_1.20.0   
## [189] GenomeInfoDb_1.34.9            gtable_0.3.1                  
## [191] DBI_1.1.3                      bayesm_3.1-5                  
## [193] stats4_4.2.0                   ggfun_0.0.9                   
## [195] httr_1.4.5                     highr_0.10                    
## [197] stringi_1.7.12                 vroom_1.6.1                   
## [199] farver_2.1.1                   annotate_1.76.0               
## [201] DT_0.27                        xml2_1.3.3                    
## [203] ggdendro_0.1.23                boot_1.3-28.1                 
## [205] BiocNeighbors_1.16.0           kableExtra_1.3.4.9000         
## [207] lme4_1.1-31                    interp_1.1-3                  
## [209] ade4_1.7-22                    geneplotter_1.76.0            
## [211] ggplotify_0.1.0                picante_1.8.2                 
## [213] energy_1.7-11                  DESeq2_1.38.3                 
## [215] DEoptimR_1.0-11                bit_4.0.5                     
## [217] jpeg_0.1-10                    MatrixGenerics_1.10.0         
## [219] pkgconfig_2.0.3                lmerTest_3.1-3                
## [221] gsl_2.1-8                      DirichletMultinomial_1.40.0   
## [223] rstatix_0.7.2                  corrplot_0.92                 
## [225] knitr_1.42

Straw – WGCNA

Grigory Gladkov

8/30/2022

0.1 Preprocessing:

0.2 Import

0.3 functions

0.3.1 modify metadata

0.3.2 Select from the dataset only experemental part

0.4 Alpha diversity

0.4.0.1 mpd

0.5 beta statistic

0.5.1 permanova

0.5.2 dynamic of beta diversity

0.5.3 basic NMDS plot

0.5.3.1 AncomBC

0.6 Splitting the dataset

0.6.1 Basic information about dataset

0.7 WGCNA

0.7.1 clr

0.7.2 vst

0.7.2.1 import libraries 2

0.7.2.2 visualisation

0.7.3 mdp and other indices for clusters

0.8 Add a metadata

0.8.1 Realtime data

0.8.2 Knee plot

0.8.3 alpha/respiration.

0.9 Export all phyloseq object in one table

0.9.1 Several pictures on the dynamics of representation of individual phyla

0.9.2 Boxplots by phyla

0.9.3 resperation and phylotypes correlations

0.10 Additions

0.11 ITS

0.12 Packages info