library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(phyloseq)
library(ampvis2)
Сырые данные - выход из DADA2 пайплайна.
В той же папке - статистика dada2
Импорт уже готовых RDS объектов
setwd("~/storage/pulkovo/d2")
ps.ff <- readRDS("ps.ff")
ancom <- readRDS("ancom")
ancom_ff <- readRDS("ancom_ff")
Функции используемые для дальнейших визуализаций.
plot_rich_reads_samlenames_lm <- function(physeq, group = "Site", label = "Repeat"){
rish <- estimate_richness(physeq, measures = "Observed")
reads.sum <- as.data.frame(sample_sums(physeq))
reads.summary <- cbind(rish, reads.sum)
colnames(reads.summary) <- c("otus","reads")
reads.summary["Repeat"] <-unlist(purrr::map(stringr::str_split(rownames(physeq@sam_data), "\\.", 2), function(x) x[[2]]))
reads.summary["Site"] <- physeq@sam_data[[group]]
library(ggrepel)
require(ggforce)
p1 <- ggplot(data=reads.summary) +
geom_point(aes(y=otus, x=log2(reads), color=Site),size=3) +
geom_text_repel(aes(y=otus, x=log2(reads), label=paste0(Repeat))) +
theme_bw() +
geom_smooth(aes(y=otus, x=log2(reads), fill=Site, color=Site),method=lm, se=FALSE, ymin = 1) +
scale_x_continuous(sec.axis = sec_axis(sec.axis ~ 2**.))
return(p1)
}
beta_custom_norm_NMDS_elli_w <- function(ps, seed = 7888, normtype="vst", Color="What", Group="Repeat"){
require(phyloseq)
require(ggplot2)
require(ggpubr)
library(ggforce)
ordination.b <- ordinate(ps, "NMDS", "bray")
mds <- as.data.frame(ordination.b$points)
p <- plot_ordination(ps,
ordination.b,
type="sample",
color = Color,
title="NMDS - Bray-Curtis",
# title=NULL,
axes = c(1,2) ) +
theme_bw() +
theme(text = element_text(size = 10)) +
geom_point(size = 3) +
annotate("text",
x=min(mds$MDS1) + abs(min(mds$MDS1))/7,
y=max(mds$MDS2),
label=paste0("Stress -- ", round(ordination.b$stress, 3))) +
geom_mark_ellipse(aes_string(group = Group, label = Group),
label.fontsize = 10,
label.buffer = unit(2, "mm"),
label.minwidth = unit(5, "mm"),
con.cap = unit(0.1, "mm"),
con.colour='gray') +
theme(legend.position = "none") +
ggplot2::scale_fill_viridis_c(option = "H")
return(p)
}
plot_alpha_w_toc <- function(ps, group, metric) {
require(phyloseq)
require(ggplot2)
ps_a <- prune_taxa(taxa_sums(ps) > 0, ps)
er <- estimate_richness(ps_a)
df_er <- cbind(ps_a@sam_data, er)
df_er <- df_er %>% select(c(group, metric))
stat.test <- aov(as.formula(paste0(metric, "~", group)), data = df_er) %>%
rstatix::tukey_hsd()
y <- seq(max(er[[metric]]), length=length(stat.test$p.adj), by=max(er[[metric]]/20))
plot_richness(ps_a, x=group, measures=metric) +
geom_boxplot() +
geom_point(size=1.2, alpha=0.3) +
ggpubr::stat_pvalue_manual(
stat.test,
label = "p.adj.signif",
y.position = y) +
theme_light() +
scale_color_brewer(palette="Dark2") +
theme(axis.text.x = element_text(angle = 90),
axis.title.x=element_blank()) +
labs(y=paste(metric, "index"))
}
phyloseq_to_ampvis2 <- function(physeq) {
#check object for class
if(!any(class(physeq) %in% "phyloseq"))
stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
#ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
if(is.null(physeq@tax_table))
stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
#OTUs must be in rows, not columns
if(phyloseq::taxa_are_rows(physeq))
abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
else
abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
#tax_table is assumed to have OTUs in rows too
tax <- phyloseq::tax_table(physeq)@.Data
#merge by rownames (OTUs)
otutable <- merge(
abund,
tax,
by = 0,
all.x = TRUE,
all.y = FALSE,
sort = FALSE
)
colnames(otutable)[1] <- "OTU"
#extract sample_data (metadata)
if(!is.null(physeq@sam_data)) {
metadata <- data.frame(
phyloseq::sample_data(physeq),
row.names = phyloseq::sample_names(physeq),
stringsAsFactors = FALSE,
check.names = FALSE
)
#check if any columns match exactly with rownames
#if none matched assume row names are sample identifiers
samplesCol <- unlist(lapply(metadata, function(x) {
identical(x, rownames(metadata))}))
if(any(samplesCol)) {
#error if a column matched and it's not the first
if(!samplesCol[[1]])
stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
} else {
#assume rownames are sample identifiers, merge at the end with name "SampleID"
if(any(colnames(metadata) %in% "SampleID"))
stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
metadata$SampleID <- rownames(metadata)
#reorder columns so SampleID is the first
metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
}
} else
metadata <- NULL
#extract phylogenetic tree, assumed to be of class "phylo"
if(!is.null(physeq@phy_tree)) {
tree <- phyloseq::phy_tree(physeq)
} else
tree <- NULL
#extract OTU DNA sequences, assumed to be of class "XStringSet"
if(!is.null(physeq@refseq)) {
#convert XStringSet to DNAbin using a temporary file (easiest)
fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
} else
fastaTempFile <- NULL
#load as normally with amp_load
ampvis2::amp_load(
otutable = otutable,
metadata = metadata,
tree = tree,
fasta = fastaTempFile
)
}
filter_chrmitnas <- function(physeq){
ps_object <- physeq
ps_object <- subset_taxa(ps_object, Phylum != "NA")
# ps_object@tax_table[is.na(ps_object@tax_table)] <- TRUEps@tax_table@.Data
ps_object <- subset_taxa(ps_object,
!(Family == "Mitochondria" |
Class == "Chloroplast" |
Order == "Chloroplast"))
# ps_object@tax_table <- dplyr::na_if(ps_object@tax_table, TRUE)
ps.f <- ps_object
out_old <- capture.output(physeq)[4] %>%
str_extract(regex("([1-9]).*(?= by)"))
out_new <- capture.output(ps.f)[4] %>%
str_extract(regex("([1-9]).*(?= by)"))
message(paste0("old = ", out_old))
message(paste0("filtered = ", out_new))
return(ps.f)
}
merge_samples_mean <- function(physeq, group){
group_sums <- as.matrix(table(sample_data(physeq)[ ,group]))[,1]
merged <- merge_samples(physeq, group)
x <- as.matrix(otu_table(merged))
if(taxa_are_rows(merged)){ x<-t(x) }
out <- t(x/group_sums)
out <- otu_table(out, taxa_are_rows = TRUE)
otu_table(merged) <- out
return(merged)
}
lsf.str()
## beta_custom_norm_NMDS_elli_w : function (ps, seed = 7888, normtype = "vst", Color = "What", Group = "Repeat")
## filter_chrmitnas : function (physeq)
## merge_samples_mean : function (physeq, group)
## phyloseq_to_ampvis2 : function (physeq)
## plot_alpha_w_toc : function (ps, group, metric)
## plot_rich_reads_samlenames_lm : function (physeq, group = "Site", label = "Repeat")
Фильтрация образцов с низкой представленностью.
filter_chrmitnas - чистит датасет он NA на уровне фил, хлоропласты и
митохондрии.
ps.f <- filter_chrmitnas(ps)
plot_rich_reads_samlenames_lm(ps.f, group = "Site", label = "Sample")
## Warning: ggrepel: 37 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ps.ff <- prune_samples(!sample_names(ps.f) %in% paste0("Andronov.SEQ130.",c(34, 5, 15)), ps.f)
ps.ff <- prune_taxa(taxa_sums(ps.ff) > 0, ps.ff)
plot_rich_reads_samlenames_lm(ps.ff, group = "Site", label = "Repeat")
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
beta_custom_norm_NMDS_elli_w(ps.ff, Color = "Site", Group = "Site")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.040883
## Run 1 stress 0.04088289
## ... New best solution
## ... Procrustes: rmse 0.00005635666 max resid 0.0003171908
## ... Similar to previous best
## Run 2 stress 0.0408829
## ... Procrustes: rmse 0.0000564756 max resid 0.0003347198
## ... Similar to previous best
## Run 3 stress 0.04088298
## ... Procrustes: rmse 0.00004553785 max resid 0.0002334572
## ... Similar to previous best
## Run 4 stress 0.04088293
## ... Procrustes: rmse 0.00002476076 max resid 0.0001333129
## ... Similar to previous best
## Run 5 stress 0.04088292
## ... Procrustes: rmse 0.00001734785 max resid 0.00009972248
## ... Similar to previous best
## Run 6 stress 0.04088298
## ... Procrustes: rmse 0.00004594101 max resid 0.0002502187
## ... Similar to previous best
## Run 7 stress 0.04088289
## ... Procrustes: rmse 0.00005431418 max resid 0.0003273973
## ... Similar to previous best
## Run 8 stress 0.0408829
## ... Procrustes: rmse 0.00001471045 max resid 0.0000883607
## ... Similar to previous best
## Run 9 stress 0.04088289
## ... Procrustes: rmse 0.000005584187 max resid 0.00002531028
## ... Similar to previous best
## Run 10 stress 0.04088294
## ... Procrustes: rmse 0.00003328422 max resid 0.000185079
## ... Similar to previous best
## Run 11 stress 0.04088294
## ... Procrustes: rmse 0.00003144656 max resid 0.0001756437
## ... Similar to previous best
## Run 12 stress 0.04088312
## ... Procrustes: rmse 0.0000934482 max resid 0.0005028334
## ... Similar to previous best
## Run 13 stress 0.040883
## ... Procrustes: rmse 0.00005659016 max resid 0.000306771
## ... Similar to previous best
## Run 14 stress 0.04088308
## ... Procrustes: rmse 0.00008217108 max resid 0.0004411722
## ... Similar to previous best
## Run 15 stress 0.04088298
## ... Procrustes: rmse 0.00004980549 max resid 0.000275673
## ... Similar to previous best
## Run 16 stress 0.04088293
## ... Procrustes: rmse 0.00002461955 max resid 0.0001399969
## ... Similar to previous best
## Run 17 stress 0.04088306
## ... Procrustes: rmse 0.00007503539 max resid 0.0004052732
## ... Similar to previous best
## Run 18 stress 0.1919038
## Run 19 stress 0.0408831
## ... Procrustes: rmse 0.00008681952 max resid 0.0004698693
## ... Similar to previous best
## Run 20 stress 0.04088288
## ... New best solution
## ... Procrustes: rmse 0.00001858335 max resid 0.0001015945
## ... Similar to previous best
## *** Best solution repeated 1 times
То же, но с рарефакшеном.
ps.r <- rarefy_even_depth(ps.ff,rngseed = 1221)
beta_custom_norm_NMDS_elli_w(ps.r, Color = "Site", Group = "Site")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03281294
## Run 1 stress 0.03281294
## ... New best solution
## ... Procrustes: rmse 0.00003300971 max resid 0.0001301202
## ... Similar to previous best
## Run 2 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.000001794148 max resid 0.00000636754
## ... Similar to previous best
## Run 3 stress 0.03281305
## ... Procrustes: rmse 0.00006464719 max resid 0.0002555404
## ... Similar to previous best
## Run 4 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.000003331408 max resid 0.00001223961
## ... Similar to previous best
## Run 5 stress 0.03281296
## ... Procrustes: rmse 0.00002995194 max resid 0.0001188467
## ... Similar to previous best
## Run 6 stress 0.03281301
## ... Procrustes: rmse 0.00005133915 max resid 0.0002033569
## ... Similar to previous best
## Run 7 stress 0.032813
## ... Procrustes: rmse 0.00005186784 max resid 0.0002055802
## ... Similar to previous best
## Run 8 stress 0.03281301
## ... Procrustes: rmse 0.00005504832 max resid 0.0002181154
## ... Similar to previous best
## Run 9 stress 0.03281294
## ... Procrustes: rmse 0.00001107934 max resid 0.00004250849
## ... Similar to previous best
## Run 10 stress 0.03281294
## ... Procrustes: rmse 0.00000897434 max resid 0.00002738668
## ... Similar to previous best
## Run 11 stress 0.03281301
## ... Procrustes: rmse 0.00004782254 max resid 0.0001885485
## ... Similar to previous best
## Run 12 stress 0.03281304
## ... Procrustes: rmse 0.00006446968 max resid 0.0002554987
## ... Similar to previous best
## Run 13 stress 0.03281293
## ... New best solution
## ... Procrustes: rmse 0.00001661198 max resid 0.00006464267
## ... Similar to previous best
## Run 14 stress 0.03281305
## ... Procrustes: rmse 0.00008591437 max resid 0.0003396772
## ... Similar to previous best
## Run 15 stress 0.03281303
## ... Procrustes: rmse 0.00007177757 max resid 0.000282788
## ... Similar to previous best
## Run 16 stress 0.03281304
## ... Procrustes: rmse 0.0000820854 max resid 0.0003245479
## ... Similar to previous best
## Run 17 stress 0.03281295
## ... Procrustes: rmse 0.00003182524 max resid 0.0001257578
## ... Similar to previous best
## Run 18 stress 0.03281302
## ... Procrustes: rmse 0.00007563761 max resid 0.0002991288
## ... Similar to previous best
## Run 19 stress 0.03281297
## ... Procrustes: rmse 0.00004755598 max resid 0.0001879429
## ... Similar to previous best
## Run 20 stress 0.03281294
## ... Procrustes: rmse 0.00002613964 max resid 0.0001031611
## ... Similar to previous best
## *** Best solution repeated 8 times
без разрежения
p1 <- plot_alpha_w_toc(ps = ps.ff, group = "Site", metric = "Observed")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(group)
##
## # Now:
## data %>% select(all_of(group))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(metric)
##
## # Now:
## data %>% select(all_of(metric))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
p2 <- plot_alpha_w_toc(ps = ps.ff, group = "Site", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps.ff, group = "Site", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)
с разряжением
p1 <- plot_alpha_w_toc(ps = ps.r, group = "Site", metric = "Observed")
p2 <- plot_alpha_w_toc(ps = ps.r, group = "Site", metric = "Shannon")
p3 <- plot_alpha_w_toc(ps = ps.r, group = "Site", metric = "InvSimpson")
ggarrange(p1, p2, p3, nrow = 1)
мажорные филотипы - относительная представленность
amp <- phyloseq_to_ampvis2(ps.ff)
amp
## ampvis2 object with 4 elements.
## Summary of OTU table:
## [1] 51 10696 884451 8346 30720 16831 17342.18
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family
## 10696(100%) 10696(100%) 10696(100%) 10696(100%) 10629(99.37%)
## Genus Species
## 6124(57.26%) 291(2.72%)
##
## Metadata variables: 10
## SampleID, Na_water, K_water, Cl, pH_salt, pH_water, C_total, Electro, Repeat, Site
amp_heatmap(amp, tax_show = 60,
group_by = "Site",
tax_aggregate = "OTU",
tax_add = "Genus",
round = 2,
normalise=TRUE,
showRemainingTaxa = FALSE)
## Warning: Transformation introduced infinite values in discrete y-axis
уровень фил
названия фил не исправлены на
Oren, Aharon, and George M. Garrity. “Valid Publication of the Names of Forty-Two Phyla of Prokaryotes.” International Journal of Systematic and Evolutionary Microbiology 71, no. 10 (October 20, 2021). https://doi.org/10.1099/ijsem.0.005056.
можно их исправить например при помощи чего-то похожего на код
ниже:
taxa <- read_tsv("taxa.txt") %>%
column_to_rownames("...1") %>%
mutate(Phylum = str_replace_all(Phylum, c("Firmicutes" = "Bacillota",
"Chloroflexi" = "Chloroflexota",
"Proteobacteria" = "Pseudomonadota",
"Desulfobacterota" = "Thermodesulfobacteriota",
"Crenarchaeota" = "Thermoproteota",
"Cyanobacteria" = "Cyanobacteriota"))) %>%
as.matrix()
amp_heatmap(amp,
tax_show = 12,
group_by = "Site",
tax_aggregate = "Phylum",
tax_add = "Kingdom",
normalise=TRUE,
showRemainingTaxa = TRUE)
PERMANOVA - как химия влияет на микробиом
ps.merged <- merge_samples(ps.ff, "Repeat")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
d_meta <- ps.merged@sam_data %>%
data.frame() %>%
select(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro")) %>%
scale() %>%
as.data.frame()
d_otu <- phyloseq::distance(ps.merged, "bray")
dec <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = TRUE )
sort <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = FALSE )
f_dec <- as.formula(paste0("d_otu ~", noquote(paste(dec, collapse=" + "))))
f_sort <- as.formula(paste0("d_otu ~", noquote(paste(sort, collapse=" + "))))
list(
order_forward = vegan::adonis2(f_dec, data = d_meta),
order_backward = vegan::adonis2(f_sort, data = d_meta)
)
## $order_forward
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = f_dec, data = d_meta)
## Df SumOfSqs R2 F Pr(>F)
## pH.salt 1 0.77383 0.46702 9.8349 0.001 ***
## Na.water 1 0.34096 0.20578 4.3334 0.022 *
## K.water 1 0.13181 0.07955 1.6752 0.271
## Electro 1 0.07642 0.04612 0.9713 0.495
## Cl 1 0.11950 0.07212 1.5187 0.259
## C.total 1 0.05705 0.03443 0.7251 0.616
## Residual 2 0.15736 0.09497
## Total 8 1.65694 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $order_backward
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## vegan::adonis2(formula = f_sort, data = d_meta)
## Df SumOfSqs R2 F Pr(>F)
## C.total 1 0.69264 0.41802 8.8030 0.001 ***
## Cl 1 0.31875 0.19237 4.0511 0.020 *
## Electro 1 0.13994 0.08446 1.7785 0.202
## K.water 1 0.16173 0.09761 2.0554 0.151
## Na.water 1 0.07581 0.04575 0.9634 0.500
## pH.salt 1 0.11072 0.06682 1.4072 0.293
## Residual 2 0.15736 0.09497
## Total 8 1.65694 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
химия - электропроводность(мера засоленности)
сипользован Z-score!
ps.ff@sam_data %>%
data.frame() %>%
# rownames_to_column("shit") %>%
# column_to_rownames("Name") %>%
select(which(sapply(., is.numeric)), Site) %>%
group_by(Site) %>%
summarise(across(everything(), mean),
.groups = 'drop') %>%
column_to_rownames("Site") %>%
scale() %>%
heatmaply::ggheatmap()
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust vegan
cca по сэмплам
library(ggrepel)
ps.merged <- phyloseq::merge_samples(ps.ff, "Repeat")
physeq <- ps.ff
veganifyOTU <- function(physeq){
require(phyloseq)
if(taxa_are_rows(physeq)){physeq <- t(physeq)}
return(as(otu_table(physeq), "matrix"))
}
otus.ps.vegan <- veganifyOTU(physeq)
rownames(otus.ps.vegan) <- ps.ff@sam_data$Repeat
metadata <- physeq@sam_data %>%
data.frame() %>%
select(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"))
dec <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = TRUE )
sort <- sort(c("Na.water", "K.water", "Cl", "pH.salt", "C.total", "Electro"), decreasing = FALSE )
f_dec <- as.formula(paste0("otus.ps.vegan ~", noquote(paste(dec, collapse=" + "))))
f_sort <- as.formula(paste0("otus.ps.vegan ~", noquote(paste(sort, collapse=" + "))))
cca_dec <- vegan::cca(f_dec, data=metadata)
cca_sort <- vegan::cca(f_sort, data=metadata)
kate.ggcca.sites <- function(vare.cca){
library(tidyverse)
biplot <- as.data.frame(vare.cca$CCA$biplot)
wa <- as.data.frame(vare.cca$CCA$wa)
biplot <- rownames_to_column(biplot, "Label") %>%
add_column(Score = rep("biplot", length(rownames(biplot))))
wa <- rownames_to_column(wa, "Label") %>%
add_column(Score = rep("sites", length(rownames(wa))))
fdat_amazing <- rbind(biplot, wa)
fdat_amazing <- fdat_amazing %>%
mutate(label_size = ifelse(Score == "biplot", 4.5, 3))
p <- ggplot(fdat_amazing %>% filter(Score %in% c("sites","biplot"))) +
# geom_abline(intercept=seq(-100, 100, 25), slope=1, colour="grey", alpha=0.5) +
# geom_abline(intercept=seq(-100, 100, 25), slope=-1, colour="grey", alpha=0.5) +
geom_vline(xintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_hline(yintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_point(data = fdat_amazing %>% dplyr::filter(Score == "sites"), mapping = aes(x=CCA1, y=CCA2, colour = factor(Score))) +
geom_segment(data = fdat_amazing %>% dplyr::filter(Score == "biplot"), aes(x = 0, xend = CCA1, y = 0, yend = CCA2),
size = 0.6, alpha=0.8, color = "red",arrow = arrow(angle = 3)) +
geom_text_repel(aes(x=CCA1, y=CCA2, label= Label),size=fdat_amazing$label_size) +
xlab(paste0(round(vare.cca$CCA$eig[1], 3)*100, "% CCA1")) +
ylab(paste0(round(vare.cca$CCA$eig[2], 3)*100, "% CCA2")) +
grids(linetype = "dashed") +
theme(legend.position = "none", panel.background = element_rect(fill = "white", colour = "grey50"))
return(p)
}
cca.sites <- kate.ggcca.sites(cca_dec)
cca.sites
тест на мультиколлинеарность
vegan::vif.cca(cca_dec)
## pH.salt Na.water K.water Electro Cl C.total
## 12.164455 27.848814 1.678713 191.952968 215.325139 30.290554
ссa по филотипам - отрисованы только 100 мажорных филотипов - цвет - к какой филе относятся
library(ggrepel)
ps.fff <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 3) > (0.1*length(x)), TRUE)
# ps.varstab <- ps_vst(ps.fff, "type")
ps.varstab <- ps.ff
# ps.varstab.merged <- phyloseq::merge_samples(ps.varstab, "Repeat")
# ps.varstab.merged.family <- tax_glom(ps.fff, taxrank="Family")
physeq <- ps.fff
ps.varstab <- physeq
veganifyOTU <- function(physeq){
require(phyloseq)
if(taxa_are_rows(physeq)){physeq <- t(physeq)}
return(as(otu_table(physeq), "matrix"))
}
otus.ps.vegan <- veganifyOTU(physeq)
metadata <- as(sample_data(physeq), "data.frame")
cca_w_varstab_asv <- vegan::cca(f_dec, data=metadata)
wa <- as.data.frame(cca_w_varstab_asv$CCA$v) %>%
rownames_to_column("ID")
taxa.pruned <- as.data.frame(ps.varstab@tax_table@.Data) %>%
rownames_to_column("ID")
taxa.pruned <- taxa.pruned %>%
mutate_all(as.character)
# old.tips <- tree$tip.label
# matches <- regmatches(tree$tip.label, gregexpr("[[:digit:]]+", tree$tip.label))
# taxa.pruned$number <- as.numeric(unlist(matches))
# Shitty taxa formating part
taxa.pruned$taxa <- ifelse(is.na(taxa.pruned$Genus),
ifelse(is.na(taxa.pruned$Family),
ifelse(is.na(taxa.pruned$Order),
ifelse(is.na(taxa.pruned$Class), taxa.pruned$Phylum, taxa.pruned$Class) , taxa.pruned$Order), taxa.pruned$Family), taxa.pruned$Genus)
taxa.pruned[taxa.pruned == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
taxa.pruned[taxa.pruned == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Pararhizobium"
taxa.pruned$taxa2 <- ifelse(is.na(taxa.pruned$Species),
with(taxa.pruned, paste0(taxa)),
with(taxa.pruned, paste0(taxa, " ", Species )))
taxa.pruned$phylum <- ifelse(taxa.pruned$Phylum == "Proteobacteria", with(taxa.pruned, paste0(taxa.pruned$Class)), with(taxa.pruned, paste0(taxa.pruned$Phylum)))
taxa.pruned$Label <- paste0(taxa.pruned$ID, "_", taxa.pruned$taxa2)
wa <- full_join(wa, taxa.pruned, by="ID") %>%
mutate(Score = "sites")
#For the top 50 most abundant taxa, skip if use des res
head_asv <- ps.varstab@otu_table@.Data %>%
data.frame %>%
colSums() %>%
sort(decreasing = TRUE) %>%
head(n=100) %>%
names()
wa <- filter(wa, ID %in% head_asv)
# maybe only different by des? Picture will be nice.
# wa <- filter(wa, ID %in% row.des)
# wa <- filter(wa, ID %in% row.des.so)
biplot <- as.data.frame(cca_w_varstab_asv$CCA$biplot)
biplot <- rownames_to_column(biplot, "Label") %>%
add_column(Score = rep("biplot", length(rownames(biplot))))
fdat_amazing <- plyr::rbind.fill(biplot, wa) %>%
mutate(Label = as.factor(Label))
fdat_amazing <- fdat_amazing %>%
mutate(label_size = ifelse(Score == "biplot", 4.5, 3))
p.cca.species <- ggplot(fdat_amazing %>% filter(Score %in% c("sites","biplot"))) +
geom_point(data = fdat_amazing %>% dplyr::filter(Score == "sites"), mapping = aes(x=CCA1, y=CCA2)) +
geom_segment(data = fdat_amazing %>% dplyr::filter(Score == "biplot"), aes(x = 0, xend = CCA1, y = 0, yend = CCA2), alpha=0.8, color = "red", arrow = arrow(angle = 3)) +
geom_text_repel(aes(x=CCA1, y=CCA2, label= Label, colour = phylum), size=fdat_amazing$label_size) +
xlab(paste0(round(cca_w_varstab_asv$CCA$eig[1], 3)*100, "% CCA1")) +
ylab(paste0(round(cca_w_varstab_asv$CCA$eig[2], 3)*100, "% CCA2")) +
grids(linetype = "dashed") +
geom_vline(xintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
geom_hline(yintercept = 0, size = 0.75, color = "#737373", alpha=0.5) +
theme(legend.position = "top", panel.background = element_rect(fill = "white", colour = "grey50"))
p.cca.species
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Первоначальная версия - сильная фильтрация Только филотипы распределенные более-менее везде + фильтрация внутри анкома на количество ридов
ps.inall <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
tax_show = 40,
group_by = "Site",
tax_aggregate = "OTU",
tax_add = "Genus",
normalise=TRUE,
showRemainingTaxa = TRUE)
## Warning: Transformation introduced infinite values in discrete y-axis
# ancom_da_glob <- ANCOMBC::ancombc(phyloseq = ps.inall,
# formula = "Site",
# p_adj_method = "fdr",
# lib_cut = 1000,
# group = "Site",
# struc_zero = TRUE,
# neg_lb = TRUE,
# tol = 1e-5,
# max_iter = 100,
# conserve = TRUE,
# alpha = 0.05,
# global = TRUE)
set.seed(123)
ancom <- ANCOMBC::ancombc2(data = ps.inall,
tax_level = NULL,
fix_formula = "Site",
rand_formula = NULL,
p_adj_method = "fdr",
pseudo = 0,
pseudo_sens = TRUE,
prv_cut = 0.10,
lib_cut = 1000,
s0_perc = 0.05,
group = "Site",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
n_cl = 15,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = FALSE,
iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
nrow = 2,
byrow = TRUE),
matrix(c(-1, 0, 1, -1),
nrow = 2,
byrow = TRUE)),
node = list(2, 2),
solver = "ECOS",
B = 100)
)
Более мягкая фильтрация.
set.seed(5882)
ancom_ff <- ANCOMBC::ancombc2(data = ps.ff,
tax_level = NULL,
fix_formula = "Site",
rand_formula = NULL,
p_adj_method = "fdr",
pseudo = 0,
pseudo_sens = TRUE,
prv_cut = 0.10,
s0_perc = 0.05,
group = "Site",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
n_cl = 20,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = FALSE,
iter_control = list(tol = 1e-2, max_iter = 20, verbose = TRUE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
nrow = 2,
byrow = TRUE),
matrix(c(-1, 0, 1, -1),
nrow = 2,
byrow = TRUE)),
node = list(2, 2),
solver = "ECOS",
B = 100)
)
# saveRDS(ancom_ff, "ancom_ff")
Нормализация библиотек. Процедура описана в
http://www.bioconductor.org/packages/release/bioc/vignettes/ANCOMBC/inst/doc/ANCOMBC.html
раздел “Bias-corrected abundances”
Статья с теорией(математика там вроде в сапплементе):
Lin, H., Peddada, S.D. Analysis of compositions of microbiomes with
bias correction. Nat Commun 11, 3514 (2020). https://doi.org/10.1038/s41467-020-17041-7
ps.an - phyloseq объект со скорректированными прочтениями + log
samp_frac <- ancom_ff$samp_frac
# Replace NA with 0
samp_frac[is.na(samp_frac)] <- 0
# Add pesudo-count (1) to avoid taking the log of 0
log_obs_abn = log(ancom_ff$feature_table + 1)
# Adjust the log observed abundances
log_corr_abn = t(t(log_obs_abn) - samp_frac)
# Show the first 6 sample
ps.an <- prune_taxa(taxa_names(ps.ff) %in% rownames(log_corr_abn), ps.ff)
otu_table(ps.an) <- otu_table(t(log_corr_abn), taxa_are_rows = FALSE)
log_corr_abn %>%
DT::datatable(caption = "Bias-corrected log observed abundances")
Сравнение - относительно Ашана.
Читать lfc_Sitehigh_road так:
log-fold change между sample Ашан и high_road
sum <- log_corr_abn %>%
as.data.frame() %>%
rowSums()
ps.an.merged <- merge_samples(ps.an, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
mean.d <- ps.an.merged@otu_table %>%
t() %>%
data.frame() %>%
mutate(
lfc_Sitehigh_road = (Ashan + high_road)/2,
lfc_SiteMiddle = (Ashan + Middle)/2,
lfc_SiteMiddle_Sitehigh_road = (Middle + high_road)/2
) %>%
rownames_to_column("taxon") %>%
select(c("taxon","lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road")) %>%
pivot_longer(!taxon, names_to = "pair", values_to = "abundance")
sum.d <- data.frame(taxon = names(sum), sum = sum, row.names = NULL)
lfc <- ancom_ff$res_pair %>%
select(c("taxon", "lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road"))
data_for_plot <- ancom_ff$res_pair %>%
select(c("taxon","lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road")) %>%
pivot_longer(c("lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road"),
names_to = "pair",
values_to = "lfc") %>%
left_join(mean.d, by=c("taxon", "pair"))
data_for_plot %>%
ggplot(aes(x=abundance, y=lfc, label=taxon, color=pair)) +
geom_point(alpha = 0.4) +
# geom_smooth(aes(y=value, x=mean, fill=pair, color=pair),
# method=lm,
# method.args = list(family = "quasipoisson"),
# se=TRUE)
coord_flip() +
theme_bw() +
scale_color_brewer(palette="Dark2") +
facet_wrap(~pair) +
theme(legend.position="bottom")
размер плитки - не abundance(обязательно реализую, но не сейчас), а
количество достоверно меняющихся филотипов
должно быть больше альфапротеобактерий(?) по литературе
или у нас влияние другого фактора, а не засоленности или проблема
анализа/визуализации
taxa_family <- ps.ff@tax_table %>%
as.data.frame() %>%
# select(-c("Genus", "Species")) %>%
rownames_to_column("ID") %>%
# select(-ID) %>%
distinct()
ancom_ff$res %>%
filter(diff_Sitehigh_road == TRUE) %>%
select(c("taxon", "lfc_Sitehigh_road")) %>%
dplyr::rename("ID" = "taxon") %>%
left_join(taxa_family, by="ID") %>%
mutate(`more in` = ifelse(lfc_Sitehigh_road > 0, "Highroad", "Ashan") %>% as.factor()) %>%
group_by(Genus, `more in`, Phylum ) %>%
summarise(area = n()) %>%
mutate(Genus = str_replace_na(Genus, replacement = ' ')) %>%
# DT::datatable() %>%
ggplot(aes(area = area,
fill = `more in`,
subgroup = Phylum,
label = Genus)) +
treemapify::geom_treemap() +
treemapify::geom_treemap_subgroup_border(color = "white") +
treemapify::geom_treemap_subgroup_text(place = "centre",
grow = T,
alpha = 0.5,
colour = "black",
fontface = "italic",
min.size = 0) +
treemapify::geom_treemap_text(colour = "white",
place = "topleft",
grow = T,
reflow = T,
layout = 'squarified') +
scale_fill_brewer(palette="Dark2") +
theme(legend.position="bottom")
## `summarise()` has grouped output by 'Genus', 'more in'. You can override using
## the `.groups` argument.
сравнены только Ашан и high_road
# т.к. phyloseq не даёт брать средние от ридов пришлось использовать кастомную функцию(украдено)
ps.mean <- merge_samples_mean(ps.ff, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
fn <- function(x, y) (x/y)
otus_filtered <- ps.mean@otu_table@.Data %>%
data.frame() %>%
select(-Middle) %>%
filter_all(all_vars(. > 0))
dist_a <- otus_filtered %>%
select(c("Ashan")) %>%
proxy::dist(method = fn)
## Registered S3 methods overwritten by 'proxy':
## method from
## print.registry_field registry
## print.registry_entry registry
dist_b <- otus_filtered %>%
select(c("high_road")) %>%
proxy::dist(method = fn)
res <- as.matrix(log(dist_a / dist_b)) %>%
as.data.frame() %>%
select(where(is.numeric)) %>%
summarise(across(everything(), mean)) %>%
pivot_longer(cols = everything()) %>%
arrange(value)
ggplot(res, aes(x=value)) +
geom_density() +
labs(title = "денсити-плот значений статистики метода ЕЕ") +
theme_bw()
похоже у метода будут сильные проблемы с FDR(?)
статистики метода ЕЕ ~ lfc
res_mod <- res %>%
dplyr::rename("taxon" = "name", "lfc" = "value")
ps.merged.repeat <- merge_samples(ps.ff, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.merged.repeat@otu_table %>%
t() %>%
data.frame() %>%
select(-Middle) %>%
rownames_to_column("taxon") %>%
left_join(res_mod, by=c("taxon")) %>%
mutate(`more in` = ifelse(lfc > 0, "Ashan", "high_road") %>% as.factor()) %>%
pivot_longer(!c(taxon, `more in`, lfc), names_to = "Site", values_to = "abundance") %>%
mutate(abundance = log2(abundance)) %>%
mutate(Site = as.factor(Site)) %>%
ggplot(aes(y=lfc, x=abundance)) +
geom_point(alpha = 0.5) +
# geom_smooth(aes(y=value, x=mean, fill=pair, color=pair),
# method=lm,
# method.args = list(family = "quasipoisson"),
# se=TRUE) +
coord_flip() +
theme_bw()
## Warning: Removed 10454 rows containing missing values (`geom_point()`).
то же по данным ЕЕ
в качестве порога(вместо p-value) я взял слева и справа по 2.5
перцентилю
FDR тоже нет, я его не вводил, но p-value у нас нет
taxa_family <- ps.ff@tax_table %>%
as.data.frame() %>%
# select(-c("Genus", "Species")) %>%
rownames_to_column("ID") %>%
# select(-ID) %>%
distinct()
# ps.merged.mean@otu_table@.Data %>%
# DT::datatable()
# res %>%
# DT::datatable()
quantile(res$value, c(0.025, .975))
## 2.5% 97.5%
## -2.446014 2.818329
percentiles <- quantile(res$value, c(0.025, .975))
res %>%
filter( value > percentiles[[2]] | value < percentiles[[1]] ) %>%
dplyr::rename("ID" = "name") %>%
left_join(taxa_family, by="ID") %>%
mutate(`more in` = ifelse(value > 0, "Highroad", "Ashan") %>% as.factor()) %>%
group_by(Genus, `more in`, Phylum ) %>%
summarise(area = n()) %>%
mutate(Genus = str_replace_na(Genus, replacement = ' ')) %>%
ggplot(aes(area = area,
fill = `more in`,
subgroup = Phylum,
label = Genus)) +
treemapify::geom_treemap() +
treemapify::geom_treemap_subgroup_border(color = "white") +
treemapify::geom_treemap_subgroup_text(place = "centre",
grow = T,
alpha = 0.5,
colour = "black",
fontface = "italic",
min.size = 0) +
treemapify::geom_treemap_text(colour = "white",
place = "topleft",
grow = T,
reflow = T,
layout = 'squarified') +
scale_fill_brewer(palette="Dark2") +
theme(legend.position="bottom")
## `summarise()` has grouped output by 'Genus', 'more in'. You can override using
## the `.groups` argument.
может быть дело в том, что нет фильтрации данных?
добавлена стандартная фильтрация для композиционных анализов, выкинуто
то, что встречается только в отдельных образцах(было 10696 - стало
888)
*это ps.filt
ps.filt <- phyloseq::filter_taxa(ps.ff, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
# т.к. phyloseq не даёт брать средние от ридов пришлось использовать кастомную функцию(украдено)
ps.mean <- merge_samples_mean(ps.filt, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
fn <- function(x, y) (x/y)
otus_filtered <- ps.mean@otu_table@.Data %>%
data.frame() %>%
select(-Middle) %>%
filter_all(all_vars(. > 0))
dist_a <- otus_filtered %>%
select(c("Ashan")) %>%
proxy::dist(method = fn)
dist_b <- otus_filtered %>%
select(c("high_road")) %>%
proxy::dist(method = fn)
res <- as.matrix(log(dist_a / dist_b)) %>%
as.data.frame() %>%
select(where(is.numeric)) %>%
summarise(across(everything(), mean)) %>%
pivot_longer(cols = everything()) %>%
arrange(value)
ggplot(res, aes(x=value)) +
geom_density() +
labs(title = "денсити-плот значений статистики метода ЕЕ") +
theme_bw()
возможно стоит добавить стабилизацию дисперсии? Или я неправильно работаю с алгоритмом.
res_mod <- res %>%
dplyr::rename("taxon" = "name",
"lfc" = "value")
ps.merged.repeat <- merge_samples(ps.filt, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.merged.repeat@otu_table %>%
t() %>%
data.frame() %>%
select(-Middle) %>%
rownames_to_column("taxon") %>%
left_join(res_mod, by=c("taxon")) %>%
mutate(`more in` = ifelse(lfc > 0, "Ashan", "high_road") %>% as.factor()) %>%
pivot_longer(!c(taxon, `more in`, lfc), names_to = "Site", values_to = "abundance") %>%
mutate(abundance = log2(abundance)) %>%
mutate(Site = as.factor(Site)) %>%
ggplot(aes(y=lfc, x=abundance)) +
geom_point(alpha = 0.5) +
# geom_smooth(aes(y=value, x=mean, fill=pair, color=pair),
# method=lm,
# method.args = list(family = "quasipoisson"),
# se=TRUE) +
coord_flip() +
theme_bw()
## Warning: Removed 286 rows containing missing values (`geom_point()`).
среднее относительная представленность в сайтах по тому растет или
нет
пояснительная картинка внизу
Довольно сильно зафильтровано, т.к. иначе сильно перекошено т.к. средние
не работают на таких данных
ps.rel <- phyloseq::transform_sample_counts(ps.filt, function(x) x / sum(x) * 100)
# ps.merged.mean <- merge_samples_mean(ps.filt, "Site")
ps.merged.mean <- merge_samples_mean(ps.rel, "Site")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.merged.mean@otu_table %>%
data.frame() %>%
select(-Middle) %>%
mutate(`more in` = ifelse(Ashan > high_road, "Ashan", "high_road") %>% as.factor()) %>%
pivot_longer(!`more in`, names_to = "Site", values_to = "mean") %>%
group_by(`more in`, Site) %>%
summarise(mean_read_count = round(mean(mean), 3))
## `summarise()` has grouped output by 'more in'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups: more in [2]
## `more in` Site mean_read_count
## <fct> <chr> <dbl>
## 1 Ashan Ashan 0.213
## 2 Ashan high_road 0.027
## 3 high_road Ashan 0.032
## 4 high_road high_road 0.181
ps.merged.mean@otu_table %>%
data.frame() %>%
select(-Middle) %>%
mutate(`more in` = ifelse(Ashan > high_road, "Ashan", "high_road") %>% as.factor()) %>%
pivot_longer(!`more in`, names_to = "Site", values_to = "mean") %>%
mutate(Label = paste0("increased_in_", `more in`, " -- ", "ASVs_from_", Site) %>% as.factor()) %>%
ggplot(aes(x=mean)) +
geom_density() +
theme_bw() +
facet_wrap(~Label)
То же, но с данными ancom-bc, изменяющиеся филотипы разделены на достоверно/недостоверно меняющиеся.
otu_corr <- log_corr_abn %>%
t() %>%
data.frame() %>%
mutate(Site = ps.ff@sam_data$Site) %>%
group_by(Site) %>%
summarise(across(everything(), mean), .groups = 'drop') %>%
pivot_longer(!Site, names_to = "taxon", values_to = "abundance")
# rename(site = Site)
otu_corr %>%
left_join(ancom_ff$res_pair , by=c("taxon")) %>%
filter(Site %in% c("Ashan", "high_road")) %>%
# filter(taxon %in% c("Seq1", "Seq2")) %>%
select(c("Site", "taxon", "lfc_Sitehigh_road", "diff_Sitehigh_road", "abundance")) %>%
mutate(increasing = ifelse(lfc_Sitehigh_road > 0, TRUE, FALSE)) %>%
ggplot(aes(x=increasing, y=abundance)) +
# geom_text(aes(label = taxon)) +
geom_boxplot(outlier.size = 0, outlier.alpha = 0) +
ggbeeswarm::geom_quasirandom(aes(color=diff_Sitehigh_road),size=0.75, alpha=0.7,dodge.width = 0.75) +
theme_light() +
scale_color_brewer(palette="Dark2") +
labs(title = "Mean", x = "lfc positive", y = "corrected log abundance", color = "Достоверность\n изменения филотипа") +
theme(axis.text.x = element_text(angle = 90)) +
# ggpubr::stat_pvalue_manual(
# stat_test, label = "{p}",
# tip.length = 0.005, hide.ns = TRUE
# ) +
facet_wrap(~Site)
То же, но в виде регрессии
ps.merged.mean@otu_table %>%
data.frame() %>%
select(-Middle) %>%
mutate(lfc = log2(Ashan/high_road)) %>%
mutate(`more in` = ifelse(Ashan > high_road, "Ashan", "high_road") %>% as.factor()) %>%
pivot_longer(!c(`more in`, lfc) , names_to = "Site", values_to = "merged_mean") %>%
mutate(merged_mean = log2(merged_mean)) %>%
mutate(Site = as.factor(Site)) %>%
ggplot(aes(x=merged_mean,
y=lfc,
label=Site,
color=Site)) +
geom_point() +
geom_smooth(aes(y=lfc,
x=merged_mean,
fill=Site,
color=Site),
method=lm,
se=TRUE) +
theme_bw() +
facet_wrap(~Site)
## `geom_smooth()` using formula = 'y ~ x'
то же, но с данными ancomBC(менее строгая фильтрацией)
lm общая, разделение по группам не влияет
dt <- otu_corr %>%
left_join(lfc, by=c("taxon")) %>%
filter(Site %in% c("Ashan", "high_road"))
l <- lm(lfc_Sitehigh_road ~ abundance*Site, data=dt)
sl <- summary(l)
p <- sl$coefficients["abundance",4]
lfc <- ancom_ff$res_pair %>%
select(c("taxon", "lfc_Sitehigh_road", "lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road"))
otu_corr %>%
left_join(lfc, by=c("taxon")) %>%
filter(Site %in% c("Ashan", "high_road")) %>%
select(-c("lfc_SiteMiddle", "lfc_SiteMiddle_Sitehigh_road")) %>%
ggplot(aes(x=abundance,
y=lfc_Sitehigh_road,
label=Site,
color=Site)) +
geom_point() +
geom_smooth(aes(y=lfc_Sitehigh_road, x=abundance, fill=Site, color=Site),
method=lm,
se=TRUE) +
theme_bw() +
labs(
title = "Ашан -- Шоссе",
subtitle = paste0( "lm:\n",
" R2adjasted -- " , round(sl$adj.r.squared, 3),
"\n abundance p-value ", format.pval(p)
)
) +
facet_wrap(~Site)
## `geom_smooth()` using formula = 'y ~ x'
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.6.0 ggforce_0.4.1 ggrepel_0.9.3 ampvis2_2.7.28
## [5] phyloseq_1.42.0 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
## [9] dplyr_1.1.0 purrr_1.0.1 readr_2.1.4 tidyr_1.3.0
## [13] tibble_3.1.8 ggplot2_3.4.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] ggbeeswarm_0.7.1 colorspace_2.1-0 ggsignif_0.6.4
## [4] ellipsis_0.3.2 XVector_0.38.0 proxy_0.4-27
## [7] rstudioapi_0.14 farver_2.1.1 ggfittext_0.9.1
## [10] DT_0.27 bit64_4.0.5 fansi_1.0.4
## [13] codetools_0.2-19 splines_4.2.0 cachem_1.0.7
## [16] knitr_1.42 polyclip_1.10-4 ade4_1.7-22
## [19] jsonlite_1.8.4 broom_1.0.3 cluster_2.1.4
## [22] compiler_4.2.0 httr_1.4.5 backports_1.4.1
## [25] assertthat_0.2.1 Matrix_1.5-3 fastmap_1.1.1
## [28] lazyeval_0.2.2 treemapify_2.5.5 cli_3.6.0
## [31] tweenr_2.0.2 htmltools_0.5.4 tools_4.2.0
## [34] igraph_1.4.1 gtable_0.3.1 glue_1.6.2
## [37] GenomeInfoDbData_1.2.9 reshape2_1.4.4 Rcpp_1.0.10
## [40] carData_3.0-5 Biobase_2.58.0 jquerylib_0.1.4
## [43] vctrs_0.5.2 Biostrings_2.66.0 rhdf5filters_1.10.0
## [46] multtest_2.54.0 ape_5.7 nlme_3.1-162
## [49] crosstalk_1.2.0 iterators_1.0.14 xfun_0.37
## [52] timechange_0.2.0 lifecycle_1.0.3 dendextend_1.16.0
## [55] rstatix_0.7.2 ca_0.71.1 zlibbioc_1.44.0
## [58] MASS_7.3-58.2 scales_1.2.1 TSP_1.2-2
## [61] vroom_1.6.1 hms_1.1.2 parallel_4.2.0
## [64] biomformat_1.26.0 rhdf5_2.42.0 RColorBrewer_1.1-3
## [67] yaml_2.3.7 gridExtra_2.3 heatmaply_1.4.2
## [70] sass_0.4.5 stringi_1.7.12 highr_0.10
## [73] S4Vectors_0.36.2 foreach_1.5.2 permute_0.9-7
## [76] seriation_1.4.1 BiocGenerics_0.44.0 GenomeInfoDb_1.34.9
## [79] rlang_1.0.6 pkgconfig_2.0.3 systemfonts_1.0.4
## [82] bitops_1.0-7 evaluate_0.20 lattice_0.20-45
## [85] Rhdf5lib_1.20.0 htmlwidgets_1.6.1 labeling_0.4.2
## [88] cowplot_1.1.1 bit_4.0.5 tidyselect_1.2.0
## [91] plyr_1.8.8 magrittr_2.0.3 R6_2.5.1
## [94] IRanges_2.32.0 generics_0.1.3 pillar_1.8.1
## [97] withr_2.5.0 mgcv_1.8-42 survival_3.5-3
## [100] abind_1.4-5 RCurl_1.98-1.10 crayon_1.5.2
## [103] car_3.1-1 utf8_1.2.3 plotly_4.10.1
## [106] tzdb_0.3.0 rmarkdown_2.20 viridis_0.6.2
## [109] grid_4.2.0 data.table_1.14.8 vegan_2.6-4
## [112] webshot_0.5.4 digest_0.6.31 stats4_4.2.0
## [115] munsell_0.5.0 beeswarm_0.4.0 registry_0.5-1
## [118] viridisLite_0.4.1 vipor_0.4.5 bslib_0.4.2
## [121] egg_0.4.5