title: "krim_chrono"
author: "GrGladkov"
date: "14 7 20"
output: html_document
setwd("~/storage/krim_chrono/shit_data/")
library(phyloseq)
ps <- readRDS(file = "~/storage/krim_chrono/shit_data/ps.krim_chrono.rds")
metadata <- ps@sam_data
metadata$Horizon <- as.character(metadata$Horizon)
metadata$Horizon[metadata$Horizon == "A"] <- "AY"
metadata$Horizon <- as.factor(metadata$Horizon)
metadata
## Site Horizon Description P2O5 K2O pH Organic Carbonates N
## kimeklis.K.10 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.11 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.12 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.13 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.14 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.15 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.16 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.17 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.18 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.19 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.1 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.20 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.21 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.22 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.23 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.24 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.25 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.26 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.27 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.28 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.29 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.2 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.30 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.31 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.32 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.33 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.34 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.35 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.3 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.4 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.56 K6 AY K6-A 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.57 K6 AY K6-A 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.58 K6 AY K6-A 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.59 K6 AY K6-A 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.5 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.60 K6 AY K6-A 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.6 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.7 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.8 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.9 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
metadata$Description <- as.character(metadata$Description)
metadata$Description[metadata$Description == "K6-A"] <- "K6-AY"
metadata
## Site Horizon Description P2O5 K2O pH Organic Carbonates N
## kimeklis.K.10 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.11 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.12 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.13 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.14 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.15 K1 AC K1-AC 6 45 8.2 0.23 33.12 0.03
## kimeklis.K.16 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.17 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.18 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.19 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.1 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.20 K2 AY K2-AY 8 595 7.9 6.84 34.13 0.10
## kimeklis.K.21 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.22 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.23 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.24 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.25 K2 C K2-C 2 14 8.2 0.47 45.60 0.43
## kimeklis.K.26 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.27 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.28 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.29 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.2 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.30 K3 AY K3-AY 11 820 7.8 8.88 28.57 0.48
## kimeklis.K.31 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.32 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.33 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.34 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.35 K3 C K3-C 5 56 8.1 0.67 4.80 0.05
## kimeklis.K.3 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.4 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.56 K6 AY K6-AY 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.57 K6 AY K6-AY 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.58 K6 AY K6-AY 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.59 K6 AY K6-AY 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.5 K1 O K1-O 123 515 7.6 22.95 20.24 1.47
## kimeklis.K.60 K6 AY K6-AY 285 1110 7.7 11.70 23.81 0.58
## kimeklis.K.6 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.7 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.8 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
## kimeklis.K.9 K1 AY K1-AY 12 212 8.0 7.32 34.50 0.78
metadata$Description <- factor(metadata$Description, levels = c("K1-O", "K1-AY", "K1-AC", "K2-AY", "K2-C", "K3-AY", "K3-C", "K6-AY"))
metadata <- metadata[order(metadata$Description, decreasing = FALSE),]
#colnames(metadata) <-
colnames(metadata) <- c("Site", "Horizon", "Description" ,"P2O5", "K2O","pH","TOC","C", "N")
sample_data(ps) <- metadata
library(phyloseq)
library(ggplot2)
plot_rich_reads_samlenames_lm <- function(physeq){
rish <- estimate_richness(physeq, measures = "Observed")
reads.sum <- as.data.frame(sample_sums(physeq))
reads.summary <- cbind(rish, reads.sum)
colnames(reads.summary) <- c("otus","reads")
reads.summary["Description"] <-unlist(purrr::map(stringr::str_split(rownames(physeq@sam_data), "K.", 2), function(x) x[[2]]))
reads.summary["Repeats"] <- physeq@sam_data$Description
reads.summary["Plant"] <- physeq@sam_data$Plant
library(ggrepel)
require(ggforce)
p1 <- ggplot(data=reads.summary) + geom_point(aes(y=otus, x=log2(reads), color=Repeats),size=3) + geom_text_repel(aes(y=otus, x=log2(reads), label=paste0(Repeats, "_", Description))) + theme_bw()+
geom_smooth(aes(y=otus, x=log2(reads), fill=Repeats, color=Repeats),method=lm, se=FALSE, ymin = 1) + scale_x_continuous(sec.axis = sec_axis(sec.axis ~ 2**.))
# geom_mark_ellipse(aes(y = otus, x=reads, group = Repeats, label = Repeats, color = Repeats), label.fontsize = 10, label.buffer = unit(2, "mm"), label.minwidth = unit(5, "mm"),con.cap = unit(0.1, "mm"))
return(p1)
}
plot_rich_reads_samlenames_lm(ps)
ps.f <- subset_samples(ps, sample_names(ps) != 'kimeklis.K.20')
ps.f <- subset_samples(ps.f, sample_names(ps.f) != 'kimeklis.K.22')
ps.f <- subset_samples(ps.f, sample_names(ps.f) != 'kimeklis.K.23')
ps.f <- prune_taxa(taxa_sums(ps.f) > 0, ps.f)
plot_rich_reads_samlenames_lm(ps.f)
### Alpha diversity violins.
library(ggpubr)
alpha.custom <- function(ps.f, arrga = "PD"){
require(picante)
pd <- pd(ps.f@otu_table@.Data, ps.f@phy_tree)
pd1 <- estimate_richness(ps.f, measures=c("Observed", "InvSimpson", "Shannon"))
pd <- cbind(pd,ps.f@sam_data, pd1 )
bmi <- levels(as.factor(pd$Description))
bmi.pairs <- combn(seq_along(bmi), 2, simplify = FALSE, FUN = function(i)bmi[i])
p1 <- ggviolin(pd, x = "Description", y = arrga,
add = "boxplot", fill = "Description") + stat_compare_means(comparisons = bmi.pairs, method = "wilcox.test") +
scale_x_discrete(limits = c("K1-O", "K1-AY", "K1-AC", "K2-AY", "K2-C", "K3-AY", "K3-C", "K6-AY")) + theme(axis.title.x = element_blank(), legend.title = element_blank())
return(p1)
}
p.alpha.Cdt.oo <- alpha.custom(ps.f, arrga = "Observed")
p.alpha.Cdt.sh <- alpha.custom(ps.f, arrga = "PD")
p.alpha.Cdt.is <- alpha.custom(ps.f, arrga = "Shannon")
p.alpha.Cdt.pd <- alpha.custom(ps.f, arrga = "InvSimpson")
p1 <- ggarrange(p.alpha.Cdt.oo, p.alpha.Cdt.sh,p.alpha.Cdt.is, p.alpha.Cdt.pd , ncol = 4 ,label.x = 0.105, nrow = 1, common.legend = TRUE)
p1
phyloseq_to_amp <- function(ps){
require(ampvis2)
require(tibble)
require(phyloseq)
colnames(ps@tax_table@.Data) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
OTU1 = as(otu_table(ps), "matrix")
OTU1 <- t(OTU1)
OTUdf = as.data.frame(OTU1)
taxa.ps <- as(tax_table(ps), "matrix")
taxa.df = as.data.frame(taxa.ps)
my_otu_table <- merge(OTUdf, taxa.df, by=0)
my_otu_table <- column_to_rownames(my_otu_table, var="Row.names")
my_metadata <- as_tibble(sample_data(ps), rownames=NA)
my_metadata <- rownames_to_column(my_metadata,var = "SampleID")
my_tree <- phy_tree(ps)
amp.ps <- amp_load(otutable = my_otu_table, metadata = my_metadata, tree = my_tree)
return(amp.ps)
}
amp <- phyloseq_to_amp(ps.f)
## Warning in class(x) <- c(setdiff(subclass, tibble_class), tibble_class): Setting class(x) to multiple strings ("tbl_df", "tbl", ...); result will no longer be an S4 object
amp_heatmap(amp, group_by = "Description",tax_add = "Phylum", tax_show = 40, tax_aggregate = "Family", order_x_by = c("K1-O", "K1-AY", "K1-AC", "K2-AY", "K2-C", "K3-AY", "K3-C", "K6-AY")) + theme_bw() + theme(text = element_text(size=15), legend.position = "none")
amp_heatmap(amp, group_by = "Description",tax_show = 20, tax_aggregate = "Phylum") + theme_bw() + theme(text = element_text(size=15), legend.position = "none")
## Warning: Transformation introduced infinite values in discrete y-axis
beta_custom_norm_NMDS_elli <- function(ps, seed = 7888, normtype="vst", color="Description"){
require(phyloseq)
require(ggplot2)
require(ggpubr)
require(DESeq2)
library(ggforce)
# beta_NMDS <- function(){
#normalisation. unifrac - rarefaction; wunifrac,bray - varstab
diagdds = phyloseq_to_deseq2(ps, ~ Description)
diagdds = estimateSizeFactors(diagdds, type="poscounts")
diagdds = estimateDispersions(diagdds, fitType = "local")
if (normtype =="vst")
pst <- varianceStabilizingTransformation(diagdds)
if (normtype =="log")
pst <- rlogTransformation(diagdds)
pst.dimmed <- t(as.matrix(assay(pst)))
pst.dimmed[pst.dimmed < 0.0] <- 0.0
ps.varstab <- ps
otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE)
ps.rand <- rarefy_even_depth(ps, rngseed = seed)
#beta and ordination
ordination.b <- ordinate(ps.varstab, "NMDS", "bray")
ordination.u <- ordinate(ps.rand, "NMDS", "unifrac")
ordination.w <- ordinate(ps.varstab, "NMDS", "wunifrac")
#plotting
p1 = plot_ordination(ps, ordination.b, type="sample", color="Description", title="NMDS - Bray-Curtis",
axes = c(1,2) ) + theme_bw() + theme(text = element_text(size = 10)) + geom_point(size = 3) +
geom_mark_ellipse(aes(group = Description, label = Description), label.fontsize = 10, label.buffer = unit(2, "mm"), label.minwidth = unit(5, "mm"),con.cap = unit(0.1, "mm"))
p2 = plot_ordination(ps, ordination.u, type="sample", color="Description", title="NMDS - UniFrac",
axes = c(1,2) ) + theme_bw() + theme(text = element_text(size = 10)) + geom_point(size = 3) +
geom_mark_ellipse(aes(group = Description, label = Description), label.fontsize = 10, label.buffer = unit(2, "mm"), label.minwidth = unit(5, "mm"),con.cap = unit(0.1, "mm"))
p3 = plot_ordination(ps, ordination.w, type="sample", color="Description", title="NMDS - Weighted UniFrac",
axes = c(1,2) ) + theme_bw() + theme(text = element_text(size = 10)) + geom_point(size = 3) +
geom_mark_ellipse(aes(group = Description, label = Description), label.fontsize = 10, label.buffer = unit(2, "mm"), label.minwidth = unit(5, "mm"),con.cap = unit(0.1, "mm"))
#merge by ggpubr
p.all <- ggarrange(p1, p2, p3, ncol = 3 , nrow = 1, common.legend = TRUE, legend = "none", font.label = list(size = 12, face = "bold", color ="black"))
return(p.all)
}
p.beta <- beta_custom_norm_NMDS_elli(ps.f)
p.beta
permanova.forloop.noNestedPlant <- function(factor){
require(vegan)
require(phyloseq)
dist <- phyloseq::distance(physeq, "bray")
metadata <- as(sample_data(physeq@sam_data), "data.frame")
ad <- adonis2(as.formula(paste( "dist ~ ", factor)), data = metadata)
ad <- cbind2(ad[1,][3], ad[1,][5])
return(ad)
}
physeq <- ps.f
col.physeq <- colnames(physeq@sam_data)
col.physeq <- col.physeq[ col.physeq != "Description"]
permanova.forloop.pos.noPlant <- purrr::possibly(permanova.forloop.noNestedPlant, otherwise = NA)
out <- purrr::map(col.physeq, permanova.forloop.pos.noPlant)
out.d.noPlant <- do.call("rbind", out)
out.d.noPlant[order(out.d.noPlant$R2, decreasing = TRUE),]
## R2 Pr(>F)
## Site 0.49421618 0.001
## Horizon 0.43021747 0.001
## N 0.18573955 0.001
## TOC 0.17705733 0.001
## pH 0.15795176 0.001
## K2O 0.14989736 0.001
## P2O5 0.11987081 0.001
## C 0.04737008 0.110
permanova.forloop.noNestedPlant <- function(factor){
require(vegan)
require(phyloseq)
dist <- phyloseq::distance(physeq, "bray")
metadata <- as(sample_data(physeq@sam_data), "data.frame")
ad <- adonis2(as.formula(paste( "dist ~ Horizon/", factor)), data = metadata)
ad <- cbind2(ad[2,][3], ad[2,][5])
return(ad)
}
physeq <- ps.f
col.physeq <- colnames(physeq@sam_data)
col.physeq <- col.physeq[ col.physeq != c("Description")]
permanova.forloop.pos.noPlant <- purrr::possibly(permanova.forloop.noNestedPlant, otherwise = NA)
out <- purrr::map(col.physeq, permanova.forloop.pos.noPlant)
out.d.noPlant <- do.call("rbind", out)
out.d.noPlant[order(out.d.noPlant$R2, decreasing = TRUE),]
## R2 Pr(>F)
## Residual 0.5697825 NA
## Horizon:Site 0.3321851 0.001
## Horizon:N 0.1637269 0.001
## Horizon:K2O 0.1478728 0.001
## Horizon:pH 0.1377684 0.001
## Horizon:P2O5 0.1126650 0.001
## Horizon:C 0.1097756 0.001
## Horizon:TOC 0.1051342 0.001
library(phyloseq)
library(vegan)
ps.merged <- merge_samples(ps.f, "Description")
physeq <- ps.merged
veganifyOTU <- function(physeq){
require(phyloseq)
if(taxa_are_rows(physeq)){physeq <- t(physeq)}
return(as(otu_table(physeq), "matrix"))
}
otus.ps.vegan <- veganifyOTU(physeq)
metadata <- as(sample_data(physeq), "data.frame")
vare.cca <- vegan::cca(otus.ps.vegan ~ N + TOC + C + pH + K2O + P2O5, data=metadata)
kate.ggcca.sites <- function(vare.cca){
require(tidyverse)
biplot <- as.data.frame(vare.cca$CCA$biplot)
wa <- as.data.frame(vare.cca$CCA$wa)
biplot <- rownames_to_column(biplot, "Label") %>%
add_column(Score = rep("biplot", length(rownames(biplot))))
wa <- rownames_to_column(wa, "Label") %>%
add_column(Score = rep("sites", length(rownames(wa))))
fdat_amazing <- rbind(biplot, wa)
p <- ggplot(fdat_amazing %>% filter(Score %in% c("sites","biplot"))) +
geom_point(data = fdat_amazing %>% dplyr::filter(Score == "sites"), mapping = aes(x=CCA1, y=CCA2, colour = factor(Score))) +
geom_segment(data = fdat_amazing %>% dplyr::filter(Score == "biplot"), aes(x = 0, xend = CCA1, y = 0, yend = CCA2), alpha=0.8, color = "red",arrow = arrow(angle = 3)) +
geom_text_repel(aes(x=CCA1, y=CCA2, label= Label),size=4) +
theme(legend.position = "none", panel.background = element_rect(fill = "white", colour = "grey50"))
return(p)
}
kate.ggcca.sites(vare.cca)
anova(vare.cca)
## Permutation test for cca under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = otus.ps.vegan ~ N + TOC + C + pH + K2O + P2O5, data = metadata)
## Df ChiSquare F Pr(>F)
## Model 6 1.46588 1.5235 0.133
## Residual 1 0.16036
anova(vare.cca, by="terms")
## Permutation test for cca under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = otus.ps.vegan ~ N + TOC + C + pH + K2O + P2O5, data = metadata)
## Df ChiSquare F Pr(>F)
## N 1 0.35556 2.2172 0.055 .
## TOC 1 0.21456 1.3379 0.247
## C 1 0.14223 0.8869 0.560
## pH 1 0.26525 1.6541 0.154
## K2O 1 0.30221 1.8846 0.090 .
## P2O5 1 0.18607 1.1603 0.353
## Residual 1 0.16036
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif.cca(vare.cca)
## N TOC C pH K2O P2O5
## 12.586457 309.966091 10.443870 688.361650 171.632543 7.990314
anova(vare.cca, by="mar")
## Permutation test for cca under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = otus.ps.vegan ~ N + TOC + C + pH + K2O + P2O5, data = metadata)
## Df ChiSquare F Pr(>F)
## N 1 0.10590 0.6604 0.771
## TOC 1 0.25883 1.6141 0.168
## C 1 0.18453 1.1507 0.337
## pH 1 0.25116 1.5662 0.174
## K2O 1 0.21595 1.3466 0.247
## P2O5 1 0.18607 1.1603 0.341
## Residual 1 0.16036
ps.f <- subset_taxa(ps.f, !is.na(phylum))
ps.ay <- prune_samples(sample_data(ps.f)$Horizon %in% c("AY"),ps.f)
ps.ay <- prune_taxa(taxa_sums(ps.ay) > 0, ps.ay)
ps.ay.w2 <- prune_samples(sample_data(ps.ay)$Site != c("K2"), ps.ay)
ps.ay.w2 <- prune_taxa(taxa_sums(ps.ay.w2) > 0, ps.ay.w2)
ps.c <- prune_samples(sample_data(ps.f)$Horizon %in% c("C", "AC"),ps.f)
ps.c <- prune_taxa(taxa_sums(ps.c) > 0, ps.c)
ps.c.w2 <- prune_samples(sample_data(ps.c)$Site != c("K2"), ps.c)
ps.c.w2 <- prune_taxa(taxa_sums(ps.c.w2) > 0, ps.c.w2)
des_w_simper <- function(ps, factor){
require(DESeq2)
require(vegan)
require(tibble)
require(phyloseq)
diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))
diagdds = estimateSizeFactors(diagdds, type="poscounts")
diagdds = estimateDispersions(diagdds, fitType = "local")
diagdds = DESeq(diagdds)
samp <-sample_data(ps)
aggdata <- t(aggregate.data.frame(as.data.frame(as.data.frame(t(diagdds@assays@data$mu))), by=list(samp[[factor]]), median))
colnames(aggdata) <- aggdata[1,]
aggdata <- aggdata[-1,]
res = results(diagdds)
res.df <- as.data.frame(res)
nice <- cbind(res.df, as.data.frame(ps@tax_table@.Data[rownames(res.df),]), as.data.frame(aggdata)[rownames(res.df),])
}
des.ay <- des_w_simper(ps.ay.w2, "Site")
des.c <- des_w_simper(ps.c.w2, "Site")
filter.des <- function(des, alpha=0.1, baseMean = 15){
des <- des[(des$padj < alpha), ]
des <- des[(des$baseMean > baseMean),]
#des <- des[(des$log2FoldChange > 0),]
des <- des[(is.na(des$padj) != TRUE),]
return(des)
}
alpha.c <- 0.05
baseMean.c <- 60
des.c.filt <- filter.des(des.c, alpha = alpha.c, baseMean = baseMean.c)
len.c <- length(rownames(des.c.filt))
paste0("length des.c after filtation with param: alpha - ", alpha.c, ", baseMean - ", baseMean.c, " ==> ", len.c)
## [1] "length des.c after filtation with param: alpha - 0.05, baseMean - 60 ==> 45"
alpha.ay <- 0.05
baseMean.ay <- 60
des.ay.filt <- filter.des(des.ay, alpha = alpha.ay, baseMean = baseMean.ay)
len.ay <- length(rownames(des.ay.filt))
paste0("length des.ay after filtation with param: alpha - ", alpha.ay, ", baseMean - ", baseMean.ay, " ==> ", len.ay)
## [1] "length des.ay after filtation with param: alpha - 0.05, baseMean - 60 ==> 26"
list.both <- c(rownames(des.c.filt), rownames(des.ay.filt))
ps.changed <- prune_taxa(list.both, ps.f)
phyloseq <- ps.changed
library(ggtree)
library(tidyverse)
tree <- phyloseq@phy_tree
taxa.pruned <- as.data.frame(phyloseq@tax_table@.Data)
taxa.pruned <- taxa.pruned %>% mutate_all(as.character)
old.tips <- tree$tip.label
matches <- regmatches(tree$tip.label, gregexpr("[[:digit:]]+", tree$tip.label))
taxa.pruned$number <- as.numeric(unlist(matches))
taxa.pruned$taxa <- ifelse(is.na(taxa.pruned$genus), taxa.pruned$family, taxa.pruned$genus)
taxa.pruned[taxa.pruned == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
taxa.pruned[taxa.pruned == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Pararhizobium"
taxa.pruned$taxa2 <- ifelse(is.na(taxa.pruned$species), with(taxa.pruned, paste0(taxa)), with(taxa.pruned, paste0(taxa, " ", species )))
taxa.pruned$taxa3 <- ifelse(taxa.pruned$phylum == "Proteobacteria", with(taxa.pruned, paste0(taxa.pruned$number, ".", taxa2, " // ", class)), with(taxa.pruned, paste0(taxa.pruned$number, ".", taxa2, " // ", phylum)))
tree$tip.label <- taxa.pruned$taxa3
p <- ggtree(tree, ladderize = F) + geom_tiplab(mapping = aes(), align=TRUE, linesize=.5) + xlim(NA, 4)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
t.ay <- as_tibble(des.ay, rownames = "id") %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select(id, log2FoldChange) %>%
rename(ay = log2FoldChange)
t.c <- as_tibble(des.c, rownames = "id") %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select(id, log2FoldChange) %>%
rename(c = log2FoldChange)
t.all <- full_join(t.ay, t.c)
names.d <- cbind(long = tree$tip.label, short = old.tips) %>% data.frame(row.names = "short")
t.names <- as_tibble(names.d, rownames = "id")
d.all.tips <- full_join(t.names, t.all) %>% select(-c(id)) %>%
as.data.frame() %>% column_to_rownames("long")
p2 <- gheatmap(p, d.all.tips, offset=2, width=0.8) + scale_colour_gradientn(colours = c("blue", "white", "red"),na.value = "grey50", guide = "colourbar",aesthetics = "fill")
p2 <- p2 + labs(fill = "L2FC K1/K3")
otus.ay.rel <- t(apply(otu_table(ps.ay), 1, function(x) x / sum(x)))
otus.c.rel <- t(apply(otu_table(ps.c), 1, function(x) x / sum(x)))
# for ay
as_tibble(t(otus.ay.rel ), rownames = "id" ) %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select(id) -> id.ay
as_tibble(t(otus.ay.rel), rownames = "id" ) %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select_if(is.numeric) %>%
replace(is.na(.), 0) %>%
mutate(ayMean=rowMeans(.)) %>%
select(ayMean) -> res.ay
res.ayMean.rel <- cbind2(id.ay,res.ay)
# for c
as_tibble(t(otus.c.rel), rownames = "id" ) %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select(id) -> id.c
as_tibble(t(otus.c.rel), rownames = "id" ) %>%
filter(id %in% phyloseq@phy_tree$tip.label) %>%
select_if(is.numeric) %>%
replace(is.na(.), 0) %>%
mutate(cMean=rowMeans(.)) %>%
select(cMean) -> res.c
res.cMean.rel <- cbind2(id.c, res.c)
all.resMean.rel <- full_join(res.ayMean.rel, res.cMean.rel)
all.resMean.rel %>% replace(is.na(.), 0) -> all.resMean.rel
all.resMean.rel$ayMean <- 0 - all.resMean.rel$ayMean
all.resMean.rel.melted <- reshape2::melt(all.resMean.rel, id=c("id"))
library(reshape2)
library(ggstance)
all.resMean.rel.melted <- melt(all.resMean.rel, id=c("id"))
first.tree <- phyloseq@phy_tree
ids.tree <- data.frame(id = first.tree$tip.label, id.taxa = tree$tip.label)
some.join <- full_join(ids.tree, all.resMean.rel.melted)
## Warning: Column `id` joining factor and character vector, coercing into character vector
some.join <- subset(some.join, select = -c(id))
some.join["numbers"] <- ifelse(some.join$value == 0, NA, abs(some.join$value))
some.join$value <- some.join$value*100
some.join["position"] <- ifelse(some.join$value >= 0, some.join$value + 0.5, some.join$value - 0.5)
p4 <- facet_plot(p2, panel = "AY / C", data = some.join, geom = geom_barh,
mapping = aes(x = value, label=value), color = "salmon", fill = "white",
stat='identity')
## Warning: Ignoring unknown aesthetics: label
p4 <- facet_plot(p4, geom=geom_text , data = some.join, mapping=aes(x=position,label=round(numbers*100, digits = 2)), panel = "AY / C")
library(gtable)
library(grid)
gt <- ggplot_gtable(ggplot_build(p4))
gt$widths[7] <- 0.5*gt$widths[7] # in this case it was colmun 7 - reduce the width by a half
plot(gt) # plot with grid draw
The first 20 most abundant phyla
des_w_simper <- function(ps, factor){
require(DESeq2)
require(vegan)
require(tibble)
require(phyloseq)
diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))
diagdds = estimateSizeFactors(diagdds, type="poscounts")
diagdds = estimateDispersions(diagdds, fitType = "local")
diagdds = DESeq(diagdds)
samp <-sample_data(ps)
aggdata <- t(aggregate.data.frame(as.data.frame(as.data.frame(t(diagdds@assays@data$mu))), by=list(samp[[factor]]), median))
colnames(aggdata) <- aggdata[1,]
aggdata <- aggdata[-1,]
res = results(diagdds)
res.df <- as.data.frame(res)
nice <- cbind(res.df, as.data.frame(ps@tax_table@.Data[rownames(res.df),]), as.data.frame(aggdata)[rownames(res.df),])
}
ps.23 <- prune_samples(sample_data(ps.f)$Site %in% c("K2", "K3"), ps.f)
ps.23 <- prune_taxa(taxa_sums(ps.23) > 0, ps.23)
ps.23.phylum <- tax_glom(ps.23, "phylum")
des.23.phylum <- des_w_simper(ps.23.phylum, "Horizon")
des.23.little <- des.23.phylum[,c("phylum", "baseMean" , "log2FoldChange", "padj", "AY", "C")]
des.23.little[order(des.23.little$baseMean, decreasing = TRUE),]
## phylum baseMean log2FoldChange padj AY C
## Seq4 Actinobacteria 6301.5255960 -0.05491843 6.294639e-01 8039.421 5219.175
## Seq15 Proteobacteria 5209.6795710 -0.41950555 1.918988e-04 7410.866 3736.752
## Seq19 Acidobacteria 3279.0575887 0.25727757 1.406927e-02 3764.159 3034.069
## Seq7 Bacteroidetes 3053.3218936 -0.97846494 1.368254e-08 4982.329 1705.274
## Seq1 Thaumarchaeota 1826.6375638 0.94448324 7.659076e-05 1595.531 2070.772
## Seq5 Verrucomicrobia 1695.6316976 0.72871180 6.394829e-03 1623.370 1814.223
## Seq3 Firmicutes 1649.1663526 0.76682627 6.553081e-02 1554.13 1783.34
## Seq279 Planctomycetes 1375.9479832 -0.14256017 7.743462e-01 1804.466 1102.410
## Seq250 Chloroflexi 499.2900010 -0.44428638 1.182226e-02 715.1036 354.4334
## Seq13 Cyanobacteria 397.2302571 -2.98485111 1.500616e-07 845.67138 72.04122
## Seq140 Gemmatimonadetes 320.2904724 0.03327555 9.116064e-01 396.9901 273.9717
## Seq51 Entotheonellaeota 301.6643230 0.34629107 2.416710e-01 335.2299 287.4061
## Seq1004 Patescibacteria 183.1121783 0.60765956 8.372335e-04 184.7009 189.8025
## Seq104 Rokubacteria 75.1990384 2.58469929 2.170211e-28 28.14306 113.85508
## Seq750 FBP 66.8784249 -2.29237238 5.832548e-11 134.04283 18.45355
## Seq3158 Armatimonadetes 51.1700534 -0.90210189 2.222247e-06 81.49297 29.40827
## Seq161 Nitrospirae 35.1831905 -0.05203801 9.116064e-01 44.84191 29.16945
## Seq1603 Elusimicrobia 15.7648539 -0.15889300 8.019380e-01 20.96892 12.66641
## Seq3581 Dependentiae 12.9131278 3.22138652 2.536225e-12 3.302095 20.769813
## Seq6494 Chlamydiae 8.9109392 0.34416406 6.284375e-01 10.072363 8.622718
## Seq6371 BRC1 6.1118797 -0.82385602 1.258233e-01 9.729961 3.706934
## Seq4706 Fibrobacteres 4.1991357 -1.04857792 2.416710e-01 7.055137 2.300177
## Seq9229 Nanoarchaeaeota 3.1238753 1.05955962 4.987620e-01 2.630226 3.697101
## Seq4958 Hydrogenedentes 2.7344146 -1.14953387 2.627495e-01 4.615518 1.403090
## Seq8424 Latescibacteria 2.5641341 3.16455815 2.471961e-04 0.6945295 4.1997819
## Seq8109 Deinococcus-Thermus 2.0429016 -3.71925103 8.238141e-03 4.5628780 0.2336371
## Seq5934 Euryarchaeota 1.9267378 1.39428326 2.323379e-01 1.359727 2.410360
## Seq7482 Omnitrophicaeota 1.6258513 -0.80498818 4.987620e-01 2.5111852 0.9693091
## Seq5493 WPS-2 1.0430037 -0.67655574 6.284375e-01 1.6026762 0.6762252
## Seq8343 Kiritimatiellaeota 0.3087374 2.15335470 6.284375e-01 0.2005017 0.6015221
## Seq10814 WS2 0.1387764 0.14781758 9.574199e-01 0.3127043 0.2336364
## Seq8586 GAL15 0.1168035 0.78900475 8.786140e-01 0.2005009 0.2336359
ps.23 <- prune_samples(sample_data(ps.f)$Site %in% c("K2"), ps.f)
ps.23 <- prune_taxa(taxa_sums(ps.23) > 0, ps.23)
ps.23.phylum <- tax_glom(ps.23, "phylum")
des.23.phylum <- des_w_simper(ps.23.phylum, "Horizon")
des.23.little <- des.23.phylum[,c("phylum", "baseMean" , "log2FoldChange", "padj", "AY", "C")]
des.23.little[order(des.23.little$baseMean, decreasing = TRUE),]
## phylum baseMean log2FoldChange padj AY C
## Seq4 Actinobacteria 6457.5907320 -0.21845165 0.401211125447261 7958.172 5597.208
## Seq15 Proteobacteria 5033.3344239 -0.33952153 0.095802174023108 6405.788 4142.715
## Seq7 Bacteroidetes 3307.7219336 -0.75106977 0.000147368928684 4638.141 2255.112
## Seq19 Acidobacteria 3241.6096078 -0.01450181 0.914243686737176 3770.754 3054.790
## Seq1 Thaumarchaeota 2067.3495622 0.91118057 0.001357623531849 1738.177 2674.886
## Seq3 Firmicutes 1309.6522692 1.54864500 0.000000482711331 830.7717 1988.7900
## Seq279 Planctomycetes 1268.4025696 -0.74555480 0.051335235246781 1777.0267 867.3177
## Seq5 Verrucomicrobia 1189.2145011 0.17280312 0.535094208211581 1306.765 1205.410
## Seq250 Chloroflexi 473.1249912 -0.19121037 0.446948685049592 579.6650 415.4662
## Seq51 Entotheonellaeota 364.5301562 -0.26471555 0.428405026666922 454.4883 309.5666
## Seq268 Gemmatimonadetes 297.8677676 0.39154859 0.428405026666922 304.0660 326.4031
## Seq1866 Patescibacteria 175.8717753 0.50339104 0.186349332370725 173.6635 201.4480
## Seq73 Cyanobacteria 168.6885307 -1.87808327 0.000052584805316 283.54993 63.12317
## Seq750 FBP 85.0924774 -2.76767654 0.000000002597082 156.0842 18.7553
## Seq104 Rokubacteria 55.9420140 2.30656779 0.000000004833719 24.10396 97.57827
## Seq3160 Armatimonadetes 52.9111173 -0.94727095 0.008312454005609 76.70889 32.55423
## Seq161 Nitrospirae 50.6023724 0.13834596 0.818795148078112 55.84219 50.29529
## Seq1603 Elusimicrobia 21.8750909 0.70126364 0.224972148493191 20.22433 26.90879
## Seq4159 Dependentiae 9.4050226 2.41666879 0.002927743007979 3.790355 16.561067
## Seq6494 Chlamydiae 8.7204203 0.66191279 0.541080898792165 8.171276 10.579476
## Seq7248 BRC1 7.6383919 -0.65733373 0.528535264363214 10.738117 5.571467
## Seq5076 Hydrogenedentes 3.1791168 -2.78603030 0.171903450527085 5.8262325 0.6912382
## Seq8424 Latescibacteria 3.0797052 3.94283677 0.027488621286452 0.5115068 6.4369798
## Seq4706 Fibrobacteres 2.7477118 0.19112318 0.914243686737176 2.956795 2.762318
## Seq8109 Deinococcus-Thermus 2.4813577 -3.31485302 0.178838308631638 4.5905733 0.3774986
## Seq7482 Omnitrophicaeota 1.3805689 0.41276713 0.914243686737176 1.365882 1.487946
## Seq6204 WPS-2 1.3649896 -0.76924023 0.813958231489664 1.8789975 0.9021535
## Seq8665 Nanoarchaeaeota 0.8279526 -2.86503431 0.446948685049592 1.7224456 0.1934652
## Seq5934 Euryarchaeota 0.6004933 3.08432039 0.446948685049592 0.1970581 1.3676840
## Seq10814 WS2 0.1014093 -0.45861083 0.914243686737176 0.3248903 0.1934638
ps.23 <- prune_samples(sample_data(ps.f)$Site %in% c("K3"), ps.f)
ps.23 <- prune_taxa(taxa_sums(ps.23) > 0, ps.23)
ps.23.phylum <- tax_glom(ps.23, "phylum")
des.23.phylum <- des_w_simper(ps.23.phylum, "Horizon")
des.23.little <- des.23.phylum[,c("phylum", "baseMean" , "log2FoldChange", "padj", "AY", "C")]
des.23.little[order(des.23.little$baseMean, decreasing = TRUE),]
## phylum baseMean log2FoldChange padj AY C
## Seq4 Actinobacteria 6185.6502430 0.008693494 9.315222e-01 8007.530 5035.367
## Seq10 Proteobacteria 5339.0233477 -0.531675056 3.866285e-03 8196.419 3543.966
## Seq19 Acidobacteria 3309.3008901 0.380819566 6.434115e-02 3732.947 3038.124
## Seq7 Bacteroidetes 2900.8424021 -1.194714731 1.159587e-05 5243.118 1431.727
## Seq5 Verrucomicrobia 2037.8713402 0.863739659 1.801414e-03 1876.703 2134.630
## Seq3 Firmicutes 1856.5498270 0.349058044 5.439981e-01 2120.441 1688.179
## Seq1 Thaumarchaeota 1664.0786448 0.929744745 1.492534e-02 1487.666 1771.340
## Seq246 Planctomycetes 1445.7245537 0.129570160 8.718199e-01 1792.654 1225.791
## Seq13 Cyanobacteria 572.0021002 -3.463596901 4.208798e-07 1362.06081 77.17302
## Seq292 Chloroflexi 516.6793296 -0.669025215 9.921645e-03 823.5642 323.7549
## Seq140 Gemmatimonadetes 334.3865911 -0.264222062 4.497756e-01 473.9220 246.6515
## Seq108 Entotheonellaeota 258.4893842 0.948097032 8.776043e-06 229.1352 276.3204
## Seq1004 Patescibacteria 187.8784224 0.630771829 2.190855e-02 191.6062 185.4413
## Seq104 Rokubacteria 87.5172312 2.633264761 6.390600e-17 31.57221 122.43686
## Seq750 FBP 54.7096622 -1.922942579 5.890294e-05 112.18044 18.49138
## Seq3158 Armatimonadetes 50.2568895 -0.935735771 7.171999e-03 85.41579 27.91060
## Seq161 Nitrospirae 24.8631209 -0.285934851 5.819428e-01 35.66735 18.28569
## Seq3581 Dependentiae 15.2473107 3.672060689 2.247749e-07 2.898612 23.094367
## Seq1603 Elusimicrobia 11.5354525 -1.311676347 2.413162e-02 21.483124 5.409524
## Seq6310 Chlamydiae 9.1231554 0.066173719 9.315222e-01 11.704290 7.659156
## Seq4932 Fibrobacteres 5.1193518 -1.624619676 8.077675e-02 10.326597 2.093214
## Seq6371 BRC1 5.0834252 -0.996425335 2.796736e-01 8.681804 2.720017
## Seq9229 Nanoarchaeaeota 4.6824891 1.419821798 2.760801e-01 3.372482 5.639935
## Seq9757 Euryarchaeota 2.8397091 0.926778414 4.652194e-01 2.482343 2.949616
## Seq4958 Hydrogenedentes 2.3820225 -0.215147105 9.315222e-01 3.326578 1.791214
## Seq10115 Latescibacteria 2.1560358 2.485563374 6.434115e-02 0.8537863 2.9887796
## Seq7482 Omnitrophicaeota 1.8346567 -1.605887061 2.664342e-01 3.5638941 0.7318468
## Seq8109 Deinococcus-Thermus 1.6859274 -3.934702067 7.696121e-02 4.3255232 0.1768035
## Seq5493 WPS-2 0.8692379 -0.764473757 7.604374e-01 1.4776707 0.5437055
## Seq8343 Kiritimatiellaeota 0.5248134 2.803573791 4.652194e-01 0.2016694 0.8800663
## Seq8586 GAL15 0.1973655 1.065177519 8.718199e-01 0.2016687 0.2637573
## Seq10814 WS2 0.1619173 0.488108151 9.315222e-01 0.3008532 0.2637578
ps.f <- subset_taxa(ps.f, !is.na(phylum))
ps.ay <- prune_samples(sample_data(ps.f)$Horizon %in% c("AY"),ps.f)
ps.ay <- prune_taxa(taxa_sums(ps.ay) > 0, ps.ay)
ps.ay.w2 <- prune_samples(sample_data(ps.ay)$Site != c("K2"), ps.ay)
ps.ay.w2 <- prune_taxa(taxa_sums(ps.ay.w2) > 0, ps.ay.w2)
ps.c <- prune_samples(sample_data(ps.f)$Horizon %in% c("C", "AC"),ps.f)
ps.c <- prune_taxa(taxa_sums(ps.c) > 0, ps.c)
ps.c.w2 <- prune_samples(sample_data(ps.c)$Site != c("K2"), ps.c)
ps.c.w2 <- prune_taxa(taxa_sums(ps.c.w2) > 0, ps.c.w2)
des_w_simper <- function(ps, factor){
require(DESeq2)
require(vegan)
require(tibble)
require(phyloseq)
diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))
diagdds = estimateSizeFactors(diagdds, type="poscounts")
diagdds = estimateDispersions(diagdds, fitType = "local")
diagdds = DESeq(diagdds)
samp <-sample_data(ps)
aggdata <- t(aggregate.data.frame(as.data.frame(as.data.frame(t(diagdds@assays@data$mu))), by=list(samp[[factor]]), median))
colnames(aggdata) <- aggdata[1,]
aggdata <- aggdata[-1,]
res = results(diagdds)
res.df <- as.data.frame(res)
nice <- cbind(res.df, as.data.frame(ps@tax_table@.Data[rownames(res.df),]), as.data.frame(aggdata)[rownames(res.df),])
}
des.ay <- des_w_simper(ps.ay.w2, "Site")
des.c <- des_w_simper(ps.c.w2, "Site")
padj <= 0.1 sorted by baseMean
ps.23.ay <- prune_samples(sample_data(ps.f)$Description %in% c("K2-AY", "K3-AY"), ps.f)
ps.23.ay <- prune_taxa(taxa_sums(ps.23.ay) > 0, ps.23.ay)
des.23.ay<- des_w_simper(ps.23.ay, "Site")
des.23.ay.little <- des.23.ay[des.23.ay$pvalue <= 0.05,]
des.23.ay.little <- des.23.ay.little[,c("class", "family", "genus", "species" , "log2FoldChange", "padj","baseMean", "K2", "K3")]
head(des.23.ay.little[order(des.23.ay.little$baseMean, decreasing = TRUE),], n=30)
## class family genus species log2FoldChange padj baseMean K2 K3
## Seq4 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -0.6902170 0.209271408233 552.10332 723.0665 426.0813
## Seq13 Oxyphotobacteria <NA> <NA> <NA> 4.8559580 0.000019353255 493.87263 30.86026 849.72593
## Seq5 Verrucomicrobiae Xiphinematobacteraceae Candidatus_Xiphinematobacter <NA> 1.3170407 0.059831883913 216.39731 122.2671 289.6468
## Seq18 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -1.0520769 0.042319401759 180.25027 261.4547 119.8892
## Seq47 Bacteroidia Chitinophagaceae <NA> <NA> -0.7603520 0.210856388475 150.47342 201.2393 112.9573
## Seq32 Nitrososphaeria Nitrososphaeraceae Candidatus_Nitrososphaera <NA> -0.9347274 0.126653050798 120.36823 169.32997 84.22541
## Seq71 Bacteroidia Chitinophagaceae <NA> <NA> -0.8472082 0.133483564816 111.11914 152.44389 80.56849
## Seq101 Bacilli Planococcaceae <NA> <NA> 6.2827587 0.000005837684 111.05479 2.625553 194.361383
## Seq51 Entotheonellia Entotheonellaceae Candidatus_Entotheonella <NA> -1.3706198 0.028424766409 92.50186 144.99861 53.31587
## Seq136 Bacilli Planococcaceae Lysinibacillus <NA> 6.1424624 0.000002217777 91.77094 2.388674 160.440006
## Seq55 Blastocatellia_(Subgroup_4) Blastocatellaceae <NA> <NA> -0.9315486 0.204897921856 85.02177 119.61637 59.62891
## Seq106 Gammaproteobacteria Burkholderiaceae Ramlibacter <NA> -1.1464142 0.035831022687 82.29999 122.28875 52.52569
## Seq178 Bacteroidia Chitinophagaceae <NA> <NA> -1.5082661 0.034873009920 74.52068 120.36027 40.22908
## Seq145 Bacteroidia Chitinophagaceae Flavisolibacter ginsengiterrae -1.0268045 0.124036315475 70.68608 101.9850 47.5913
## Seq172 Bacteroidia Chitinophagaceae <NA> <NA> -1.5664116 0.048936849544 65.18515 106.62111 34.22919
## Seq322 Bacteroidia Chitinophagaceae <NA> <NA> 9.2041431 0.000928666722 61.01939 0.1923917 107.8945958
## Seq137 Verrucomicrobiae Chthoniobacteraceae Chthoniobacter <NA> -0.9083007 0.212757140169 57.27677 79.89432 40.47440
## Seq391 Alphaproteobacteria Mitochondria <NA> <NA> 5.4420457 0.055577708860 55.28865 2.322594 96.002522
## Seq29 Verrucomicrobiae Chthoniobacteraceae Candidatus_Udaeobacter copiosus 1.7755367 0.002514736073 52.35507 23.10500 75.21182
## Seq223 Alphaproteobacteria Azospirillaceae Skermanella <NA> -1.1382169 0.037169053170 49.66916 73.70392 31.83783
## Seq176 Bacteroidia Chitinophagaceae Flavitalea <NA> -1.3939973 0.032824040321 47.52926 74.81275 27.06643
## Seq339 Clostridia Peptostreptococcaceae Romboutsia <NA> 3.2444035 0.011860686411 45.93065 8.313558 74.910036
## Seq299 Blastocatellia_(Subgroup_4) Blastocatellaceae Stenotrophobacter <NA> -1.1114093 0.188965694319 44.42489 65.34386 28.75594
## Seq264 Thermoleophilia Solirubrobacteraceae Solirubrobacter <NA> -1.3861085 0.071655586821 43.13225 67.80083 24.66409
## Seq252 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -1.1246927 0.084442223123 42.40841 62.79985 27.38310
## Seq379 Actinobacteria Pseudonocardiaceae Pseudonocardia <NA> 1.9727596 0.037169053170 42.27788 16.65109 62.14309
## Seq122 Actinobacteria Micromonosporaceae Actinoplanes <NA> 1.7981053 0.017903446463 42.08335 18.33308 60.61909
## Seq8 Verrucomicrobiae Chthoniobacteraceae Candidatus_Udaeobacter <NA> 2.1886471 0.004059966740 41.63250 14.46773 62.71034
## Seq406 Alphaproteobacteria Beijerinckiaceae Microvirga <NA> -2.0506494 0.006337265674 40.86693 73.07056 16.76972
## Seq375 Actinobacteria Microbacteriaceae Microbacterium <NA> 1.5002450 0.177850598912 40.12390 20.58712 55.37385
padj <= 0.05 sorted by baseMean
ps.32.c <- prune_samples(sample_data(ps.f)$Description %in% c("K3-C", "K2-C"), ps.f)
ps.32.c <- prune_taxa(taxa_sums(ps.32.c) > 0, ps.32.c)
des.32.c<- des_w_simper(ps.32.c, "Site")
des.32.c.little <- des.32.c[des.32.c$pvalue <= 0.05,]
des.32.c.little <- des.32.c.little[,c("class", "family", "genus", "species" , "log2FoldChange", "padj","baseMean")]
head(des.32.c.little[order(des.32.c.little$baseMean, decreasing = TRUE),], n=30)
## class family genus species log2FoldChange padj baseMean
## Seq8 Verrucomicrobiae Chthoniobacteraceae Candidatus_Udaeobacter <NA> 2.2297154 0.00000001256169 420.66505
## Seq5 Verrucomicrobiae Xiphinematobacteraceae Candidatus_Xiphinematobacter <NA> 1.8583671 0.00186389510441 371.34034
## Seq19 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 0.9456689 0.23072471255003 286.64823
## Seq16 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -1.0099998 0.08206581914651 209.66124
## Seq29 Verrucomicrobiae Chthoniobacteraceae Candidatus_Udaeobacter copiosus 1.3397909 0.00590735303239 200.00985
## Seq7 Bacteroidia Microscillaceae <NA> <NA> -1.2879384 0.01048760974141 162.85297
## Seq23 Alphaproteobacteria Xanthobacteraceae <NA> <NA> 0.7831766 0.17323010048667 140.84314
## Seq31 Blastocatellia_(Subgroup_4) Blastocatellaceae <NA> <NA> 1.4467723 0.00093896981311 126.76307
## Seq6 Thermoleophilia 67-14 <NA> <NA> 0.7572074 0.24581532592615 120.60852
## Seq80 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 1.4225795 0.05743886785235 117.50030
## Seq28 Thermoleophilia 67-14 <NA> <NA> 1.4682131 0.00182092701751 96.62984
## Seq44 Verrucomicrobiae Xiphinematobacteraceae Candidatus_Xiphinematobacter <NA> 1.4848378 0.00645008803754 95.42536
## Seq32 Nitrososphaeria Nitrososphaeraceae Candidatus_Nitrososphaera <NA> -0.7303119 0.23523201173415 79.76755
## Seq53 Bacteroidia Chitinophagaceae <NA> <NA> -2.2464986 0.00279723903693 78.00566
## Seq242 Bacilli Planococcaceae <NA> <NA> 2.1746283 0.00186389510441 58.89644
## Seq204 Bacteroidia Chitinophagaceae <NA> <NA> -1.2988222 0.17042495835560 55.06039
## Seq330 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -1.1385370 0.18705978934018 51.26132
## Seq112 Subgroup_6 <NA> <NA> <NA> 1.0706255 0.06173710439958 49.59135
## Seq320 Acidimicrobiia <NA> <NA> <NA> 1.7732765 0.00068955300171 48.71683
## Seq193 Gammaproteobacteria SC-I-84 <NA> <NA> 2.2399658 0.00066186895223 48.34003
## Seq498 Actinobacteria Pseudonocardiaceae Actinophytocola <NA> 1.1583362 0.24241147243214 45.26491
## Seq316 Subgroup_18 <NA> <NA> <NA> 1.3198681 0.09803503262804 45.01591
## Seq246 Phycisphaerae WD2101_soil_group <NA> <NA> 1.4064123 0.09803503262804 44.61771
## Seq218 Acidimicrobiia <NA> <NA> <NA> 1.5130077 0.04357995121887 44.59771
## Seq90 Actinobacteria Propionibacteriaceae Microlunatus <NA> -1.8148877 0.00590735303239 39.11818
## Seq445 Bacteroidia Hymenobacteraceae Adhaeribacter <NA> 4.6952928 0.00000003669410 39.05398
## Seq283 Verrucomicrobiae Xiphinematobacteraceae Candidatus_Xiphinematobacter <NA> 2.3056832 0.00068955300171 37.57106
## Seq292 TK10 <NA> <NA> <NA> 1.2532720 0.07446400005776 37.48681
## Seq440 Verrucomicrobiae Chthoniobacteraceae Chthoniobacter <NA> 1.3838335 0.07740415140092 35.75134
## Seq636 TK10 <NA> <NA> <NA> 1.0773684 0.21950976902545 34.67245
padj <= 0.05 sorted by baseMean
ps.63.ay <- prune_samples(sample_data(ps.f)$Description %in% c("K6-AY", "K3-AY"), ps.f)
ps.63.ay <- prune_taxa(taxa_sums(ps.63.ay) > 0, ps.63.ay)
des.63.ay<- des_w_simper(ps.63.ay, "Site")
des.63.ay.little <- des.63.ay[des.63.ay$pvalue <= 0.05,]
des.63.ay.little <- des.63.ay.little[,c("class", "family", "genus", "species" , "log2FoldChange", "padj","baseMean")]
head(des.63.ay.little[order(des.63.ay.little$baseMean, decreasing = TRUE),],n=30)
## class family genus species log2FoldChange padj baseMean
## Seq1 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -1.2496476 3.182685e-03 488.51004
## Seq13 Oxyphotobacteria <NA> <NA> <NA> -9.9181301 4.510890e-11 424.40545
## Seq11 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 5.0511850 7.488906e-42 313.87699
## Seq20 Nitrososphaeria Nitrososphaeraceae Candidatus_Nitrocosmicus <NA> 6.9156269 2.754011e-56 282.03682
## Seq4 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -2.6254880 3.249839e-13 253.55292
## Seq5 Verrucomicrobiae Xiphinematobacteraceae Candidatus_Xiphinematobacter <NA> -3.2376422 3.432189e-17 164.70349
## Seq12 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -0.9019232 5.342809e-02 159.41671
## Seq16 Nitrososphaeria Nitrososphaeraceae <NA> <NA> 0.8744518 3.450204e-02 152.85693
## Seq10 Gammaproteobacteria Burkholderiaceae <NA> <NA> -1.2526268 6.819683e-05 150.76521
## Seq37 Bacteroidia Chitinophagaceae <NA> <NA> -3.9428088 9.913543e-10 129.36923
## Seq35 Acidobacteriia Solibacteraceae_(Subgroup_3) Bryobacter <NA> -1.0277809 2.845628e-02 123.92470
## Seq15 Deltaproteobacteria <NA> <NA> <NA> -1.5096323 4.587891e-03 115.02011
## Seq101 Bacilli Planococcaceae <NA> <NA> -3.4051953 5.496192e-04 106.18617
## Seq2 Nitrososphaeria Nitrososphaeraceae <NA> <NA> 1.1445372 8.036686e-03 105.50699
## Seq14 Alphaproteobacteria Xanthobacteraceae Bradyrhizobium <NA> 1.3468044 1.576730e-03 97.39025
## Seq18 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -0.8043396 1.145121e-02 96.29822
## Seq17 Bacilli <NA> <NA> <NA> -1.2237518 3.179254e-02 95.98438
## Seq53 Bacteroidia Chitinophagaceae <NA> <NA> 1.9361306 5.107019e-05 94.40378
## Seq136 Bacilli Planococcaceae Lysinibacillus <NA> -3.6678868 4.328289e-05 86.62313
## Seq165 Oxyphotobacteria <NA> <NA> <NA> 2.7029029 3.429441e-03 84.89485
## Seq119 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 3.1701006 3.065906e-12 81.77933
## Seq60 Blastocatellia_(Subgroup_4) Blastocatellaceae Aridibacter famidurans -5.1366962 3.750045e-22 78.87550
## Seq56 Deltaproteobacteria <NA> <NA> <NA> -0.9003433 3.409197e-02 77.36357
## Seq34 Thermoleophilia <NA> <NA> <NA> -4.3318491 1.413236e-15 76.11529
## Seq41 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 1.2446784 6.541400e-04 75.32734
## Seq31 Blastocatellia_(Subgroup_4) Blastocatellaceae <NA> <NA> -2.3942744 5.258145e-13 72.39609
## Seq30 Gammaproteobacteria Steroidobacteraceae Steroidobacter uvarum 1.9954990 3.503873e-08 71.89920
## Seq94 Subgroup_6 <NA> <NA> <NA> 1.9149295 1.092631e-07 70.41849
## Seq161 Nitrospira Nitrospiraceae Nitrospira japonica 3.7894569 1.316945e-16 68.69838
## Seq76 Rubrobacteria Rubrobacteriaceae Rubrobacter <NA> -2.2544902 3.666089e-07 67.33751
padj <= 0.05 sorted by baseMean
ps.61.ay <- prune_samples(sample_data(ps.f)$Description %in% c("K6-AY", "K1-AY"), ps.f)
ps.61.ay <- prune_taxa(taxa_sums(ps.61.ay) > 0, ps.61.ay)
des.61.ay<- des_w_simper(ps.61.ay, "Site")
des.61.ay.little <- des.61.ay[des.61.ay$pvalue <= 0.05,]
des.61.ay.little <- des.61.ay.little[,c("class", "family", "genus", "species" , "log2FoldChange", "padj","baseMean")]
head(des.61.ay.little[order(des.61.ay.little$baseMean, decreasing = TRUE),], n=30)
## class family genus species log2FoldChange padj baseMean
## Seq2 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -1.9600443 4.935722e-06 343.93501
## Seq3 Bacilli <NA> <NA> longiquaesitum 9.4107381 3.671233e-34 337.93909
## Seq11 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 4.5317267 1.414452e-48 309.69853
## Seq20 Nitrososphaeria Nitrososphaeraceae Candidatus_Nitrocosmicus <NA> 4.1698488 1.196950e-43 287.02707
## Seq6 Thermoleophilia 67-14 <NA> <NA> -5.2249513 1.018728e-45 187.53285
## Seq30 Gammaproteobacteria Steroidobacteraceae Steroidobacter uvarum -0.8942872 1.013325e-03 160.02997
## Seq7 Bacteroidia Microscillaceae <NA> <NA> 2.3783229 3.001192e-18 150.13146
## Seq1 Nitrososphaeria Nitrososphaeraceae <NA> <NA> 7.6266714 1.994774e-35 140.86738
## Seq21 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -1.5967012 1.137861e-06 123.41872
## Seq36 Thermoleophilia Solirubrobacteraceae Solirubrobacter <NA> -4.4758979 4.661652e-33 104.72293
## Seq16 Nitrososphaeria Nitrososphaeraceae <NA> <NA> 9.9985217 1.549756e-23 95.99914
## Seq24 <NA> <NA> <NA> <NA> -2.2959230 9.704926e-09 93.65713
## Seq42 Alphaproteobacteria Reyranellaceae Reyranella <NA> -2.7515738 6.308853e-22 88.06108
## Seq50 Actinobacteria Micromonosporaceae Asanoa <NA> -0.6003106 4.285014e-02 83.69577
## Seq46 Alphaproteobacteria Xanthobacteraceae <NA> <NA> -2.1148176 7.024057e-13 79.63385
## Seq40 Acidimicrobiia Ilumatobacteraceae <NA> <NA> -2.5855485 7.243064e-20 79.40673
## Seq33 Nitrososphaeria Nitrososphaeraceae <NA> <NA> -7.6638276 7.049111e-21 78.91617
## Seq48 Thermoleophilia Solirubrobacteraceae Solirubrobacter <NA> -1.0144864 3.249757e-04 78.00737
## Seq25 Actinobacteria Propionibacteriaceae Microlunatus <NA> -8.0594616 2.264272e-18 77.47479
## Seq81 Acidimicrobiia <NA> <NA> <NA> -0.8494601 4.815188e-02 74.60126
## Seq49 Gammaproteobacteria Nitrosomonadaceae MND1 <NA> -9.6831806 2.148062e-20 74.20715
## Seq53 Bacteroidia Chitinophagaceae <NA> <NA> 9.0250517 3.707050e-19 72.91090
## Seq165 Oxyphotobacteria <NA> <NA> <NA> 9.5871242 1.074578e-11 72.14437
## Seq119 Blastocatellia_(Subgroup_4) Pyrinomonadaceae RB41 <NA> 6.6721824 2.987569e-27 72.06971
## Seq57 Verrucomicrobiae Chthoniobacteraceae Candidatus_Udaeobacter <NA> -2.5828842 4.443107e-20 71.75281
## Seq94 Subgroup_6 <NA> <NA> <NA> 2.0003699 8.282151e-12 67.80215
## Seq169 Nitrososphaeria Nitrososphaeraceae <NA> <NA> 1.3963539 2.047760e-06 66.32096
## Seq161 Nitrospira Nitrospiraceae Nitrospira japonica 4.0569890 3.303835e-28 65.94252
## Seq93 Gammaproteobacteria <NA> <NA> <NA> -1.5841491 1.639893e-05 65.67189
## Seq32 Nitrososphaeria Nitrososphaeraceae Candidatus_Nitrososphaera <NA> 7.6847956 1.237219e-16 62.73745