Last active
July 13, 2022 06:37
-
-
Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.
Revisions
-
MarinaGolivets revised this gist
Jul 13, 2022 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,7 +40,7 @@ for (p in pkgs) { library(p, character.only = TRUE) } # choose an option to not treat warnings as errors options(warn = 1) # check what packages are being used on your machine -
MarinaGolivets revised this gist
Jul 13, 2022 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -772,6 +772,8 @@ write_csv2(veg, here("veg_data_2020_edited.csv")) # analyse species composition ---------------------------------------------------------------------- veg <- read_csv2(here("veg_data_2022_edited.csv")) mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg) str(mat) -
MarinaGolivets revised this gist
Jul 12, 2022 . 1 changed file with 16 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -50,6 +50,7 @@ sessionInfo() here() # load data ---------------------------------------------------------------------------------------- if (length(list.files(here(), "veg_data_2022.csv")) == 0) { # load the data directly from the Google sheet veg <- googlesheets4::read_sheet( @@ -664,6 +665,8 @@ levene_test(veg, Hoehe.avg.log ~ Lebensform) # perform ANOVA and the Kruskal-Wallis test (non-parametric alternative) anova_test(veg, Hoehe.avg.log ~ Lebensform) # because the normality of residuals assumption was violated # the Kruskal Wallis test should be used instead of a standard ANOVA kruskal_test(veg, Hoehe.avg.log ~ Lebensform) @@ -677,10 +680,12 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg) ggqqplot(residuals(lm.hoe.best)) shapiro_test(residuals(lm.hoe.best)) levene_test(veg, Hoehe.avg.log ~ Bestaeubung) # because the normality of residuals assumption was violated # we use the Kruskal Wallis test instead of a standard ANOVA # and the Dunn's post hoc test for pairwise comparisons hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.kw hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.pwc @@ -742,9 +747,14 @@ fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse) ggpubr::ggqqplot(residuals(fm.gmasse.best)) rstatix::shapiro_test(residuals(fm.gmasse.best)) ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung") levene_test(gmasse, Germinulenmasse.log ~ Bestaeubung) # because the homogeneity of variance assumption was violated # we use the Welch test instead of a standard ANOVA # and the Games-Howell post hoc test for pairwise comparisons gmasse.best.aov <- welch_anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung) gmasse.best.aov gmasse.best.pwc <- games_howell_test(gmasse, Germinulenmasse.log ~ Bestaeubung) gmasse.best.pwc gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung") @@ -931,4 +941,4 @@ mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8) mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8) dat[dat$pc1 > 4, ] dat[dat$pc2 < -3, ] -
MarinaGolivets revised this gist
Jul 12, 2022 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -757,6 +757,9 @@ ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") + x = "Bestäubungstyp" ) # save the edited data as a separate file write_csv2(veg, here("veg_data_2020_edited.csv")) # analyse species composition ---------------------------------------------------------------------- mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg) -
MarinaGolivets revised this gist
Jul 12, 2022 . 1 changed file with 89 additions and 74 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -29,9 +29,11 @@ pkgs <- c( ) # install (if not installed) and load all the packages # # run only once! # install.packages("devtools") # devtools::install_github("ddsjoberg/gtsummary") # devtools::install_github("ropensci/skimr") for (p in pkgs) { if (!require(p, character.only = TRUE)) install.packages(p) @@ -48,23 +50,22 @@ sessionInfo() here() # load data ---------------------------------------------------------------------------------------- if (length(list.files(here(), "veg_data_2022.csv")) == 0) { # load the data directly from the Google sheet veg <- googlesheets4::read_sheet( "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE", sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc" ) # save the data to your home directory readr::write_csv2(veg, here("veg_data_2022.csv")) } else { # load the data from your home directory veg <- readr::read_csv2(here("veg_data_2022.csv")) } # take a quick look at the data glimpse(veg) # rename columns ----------------------------------------------------------------------------------- veg %<>% @@ -135,6 +136,7 @@ veg %<>% Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols as.integer() # convert to integer ) veg[veg$Fortlaufendenr == 41, "Feuchte"] <- NA # check taxon names tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE) @@ -144,9 +146,10 @@ veg %<>% WissName = recode( WissName, `Taraxacum officinalis` = "Taraxacum officinale", `Viola arvense` = "Viola arvensis" ) ) veg[veg$Fortlaufendenr == 41, "WissName"] <- "Agrostis capillaris" # check leaf area summary(veg$Blattflaeche) @@ -169,6 +172,7 @@ veg %<>% mutate(Germinulenmasse = case_when( WissName == "Cerastium semidecandrum" ~ .04, WissName == "Agrostis stolonifera" ~ .07, Fortlaufendenr == 41 ~ .1, TRUE ~ Germinulenmasse )) @@ -220,6 +224,9 @@ veg %<>% )) summary(veg$Germinulenmasse) # convert life form and pollination to factor veg %<>% mutate(across(c(Lebensform, Bestaeubung), ~ as.factor(.))) # calculate SLA & LDMC ----------------------------------------------------------------------------- veg %<>% @@ -325,9 +332,9 @@ boxplot(LDMC ~ Feuchte_CWM, dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg", x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y" ) ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + geom_point(na.rm = TRUE) + stat_smooth(method = "lm", col = "#E69F00", se = FALSE, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]") + theme_minimal() @@ -339,39 +346,44 @@ performance::check_model(fm0.LDMC) fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover summary(fm1.LDMC) check_model(fm1.LDMC) # compare the two models tbl_merge( tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)), tab_spanner = c("**Non-weighted**", "**Weigted**") ) # using soil moisture (Bodenfeuchte) as explanatory variable fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg) summary(fm0.LDMC.b) check_model(fm0.LDMC.b) fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.LDMC.b) check_model(fm1.LDMC.b) # compare the two models tbl_merge( tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)), tab_spanner = c("**Non-weighted**", "**Weigted**") ) plot1 <- ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + geom_point(na.rm = TRUE) + geom_abline( intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2], col = "#E69F00", size = 1, na.rm = TRUE ) + geom_abline( intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2], col = "#56B4E9", size = 1, na.rm = TRUE ) + labs(x = "Bodenfeuchte [%]") + theme_minimal() + annotate("text", x = 20, y = 270, label = "Non-weighted", col = "#E69F00") + annotate("text", x = 25, y = 320, label = "Weighted", col = "#56B4E9") plot1 # analyse SLA -------------------------------------------------------------------------------------- @@ -410,23 +422,26 @@ filter(veg, WasserHH == 7 & SLA.log < 3) %>% # perform regression analyses fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg) summary(fm1.SLA) check_model(fm1.SLA) fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg) summary(fm1.SLA.log) check_model(fm1.SLA.log) fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.SLA.log.b) check_model(fm1.SLA.log.b) # plot the regression line plot2 <- ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) + geom_point(na.rm = TRUE) + geom_abline( intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2], col = "#56B4E9", size = 1, na.rm = TRUE ) + labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) + theme_minimal() plot2 # look at the relationship between SLA and LDMC plot(SLA ~ LDMC, data = veg) @@ -453,9 +468,9 @@ boxplot(Hoehe.avg ~ WasserHH, log = "y" ) filter(veg, WasserHH == 6 & Hoehe.avg > 50) %>% select(WissName, Aufnahmenr, Hoehe.avg) filter(veg, WasserHH == 8 & Hoehe.avg < 20) %>% select(WissName, Aufnahmenr, Hoehe.avg) boxplot(Hoehe.avg.log ~ WasserHH, @@ -467,21 +482,24 @@ par(op) # perform regression analysis fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg) summary(fm1.hoehe.avg.log) check_model(fm1.hoehe.avg.log) fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.hoehe.avg.log.b) check_model(fm1.hoehe.avg.log.b) # plot the regression line plot3 <- ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) + geom_point(na.rm = TRUE) + geom_abline( intercept = fm1.hoehe.avg.log.b$coefficients[1], slope = fm1.hoehe.avg.log.b$coefficients[2], col = "#56B4E9", size = 1, na.rm = TRUE ) + geom_smooth(col = "red", se = FALSE, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) + theme_minimal() plot3 # analyse height variance -------------------------------------------------------------------------- @@ -512,17 +530,17 @@ boxplot(Hoehe.cv ~ WasserHH, data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1] ) filter(veg, WasserHH == 4 & Hoehe.cv > 100) %>% select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv) # perform regression analysis fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung) summary(fm1.Hoehe.cv.log) check_model(fm1.Hoehe.cv.log) fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung) summary(fm1.Hoehe.cv.log.b) check_model(fm1.Hoehe.cv.log.b) # analyse seed mass -------------------------------------------------------------------------------- @@ -560,14 +578,10 @@ lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>% head(lft.1) tail(lft.1) lft.2 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.1) lft.2 <- DescTools::as.matrix.xtabs(lft.3) lft.2 chisq_test(lft.2, simulate.p.value = TRUE) # plot the distribution of life forms across humidity levels # using base R @@ -577,11 +591,11 @@ barplot(lft.3, ylab = expression(sum("Deckung")), main = "Lebensformenspektren", ylim = c(0, 160) ) legend(8.5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3))) par(op) # using ggplot2 ggplot(data = lft.1, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) + geom_bar(position = "stack", stat = "identity") + viridis::scale_fill_viridis(discrete = TRUE) + labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + @@ -601,14 +615,10 @@ bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>% head(bst.1) tail(bst.1) bst.2 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.1) bst.2 <- DescTools::as.matrix.xtabs(bst.2) bst.2 chisq_test(bst.2, simulate.p.value = TRUE) # plot the distribution of pollination types # using base R @@ -620,19 +630,20 @@ barplot(bst.3, sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols ) legend(0, 140, legend = c("Wind", "Selbst", "multiple", "Insekten"), pch = 15, col = cols[4:1], cex = 0.7 ) # using ggplot2 plot4 <- ggplot(data = bst.1, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) + geom_bar(position = "stack", stat = "identity") + viridis::scale_fill_viridis( discrete = TRUE, labels = c("Insekten", "multiple", "Selbst", "Wind") ) + labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + ggtitle("Bestäubungstypenspektren") + theme_minimal() plot4 # analyse the relationships between traits --------------------------------------------------------- @@ -657,6 +668,7 @@ kruskal_test(veg, Hoehe.avg.log ~ Lebensform) # test if the height varies across pollination types veg$Bestaeubung <- as.factor(veg$Bestaeubung) ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log") group_by(veg, Bestaeubung) %>% identify_outliers(Hoehe.avg.log) %>% @@ -665,23 +677,24 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg) ggqqplot(residuals(lm.hoe.best)) shapiro_test(residuals(lm.hoe.best)) levene_test(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung) # perform a post-hoc test (i.e. pairwise comparisons) hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.pwc # plot the results hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung") plot6 <- ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") + stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) + labs( subtitle = get_test_label(hoe.best.kw, detailed = TRUE), caption = get_pwc_label(hoe.best.pwc), y = expression(log("Wuchshöhe")), x = "Bestäubungstyp" ) plot6 # test if the SLA varies across life forms ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL")) @@ -705,18 +718,20 @@ ggqqplot(residuals(lm.gmasse.lf)) shapiro_test(residuals(lm.gmasse.lf)) levene_test(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.aov gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.pwc gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform") plot7 <- ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") + stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) + labs( subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE), caption = get_pwc_label(gmasse.lf.pwc), y = expression(log("Germinulenmasse")), x = "Lebensform" ) plot7 # test if the seed mass varies across pollination types -
MarinaGolivets renamed this gist
Jul 11, 2022 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
MarinaGolivets created this gist
Jul 11, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,916 @@ # GP Oekologie 2022 # Ingolf Kühn & Marina Golivets # install & load R packages ------------------------------------------------------------------------ rm(list = ls()) # packages that we need for analyses pkgs <- c( "here", # for handling working directory "googlesheets4", # for loading data from Google spreadsheets "tidyverse", # for data manipulation "magrittr", # for piping "skimr", # for summarizing data "gtsummary", # for summarizing data "taxize", # for checking taxonomic names "TR8", # for retrieving trait data from databases "performance", # for inspecting regression models, "rstatix", # for statistical analyses, e.g. ANOVA "DescTools", # for miscellaneous basic stats "ggpubr", # for plotting "ggrepel", # for plotting "viridis", # for plotting "cowplot", # for plotting "vegan", # for multivariate analyses "FactoMineR", # for PCA "ks" # for kernel smoothers ) # install (if not installed) and load all the packages install.packages("devtools") devtools::install_github("ddsjoberg/gtsummary") devtools::install_github("ropensci/skimr") for (p in pkgs) { if (!require(p, character.only = TRUE)) install.packages(p) library(p, character.only = TRUE) } # choose an option to treat warnings as errors options(warn = 1) # check what packages are being used on your machine sessionInfo() # check your working (home) directory here() # load data ---------------------------------------------------------------------------------------- # load the data directly from the Google sheet veg <- googlesheets4::read_sheet( "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE", sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc" ) # take a quick look at the data glimpse(veg) # save the data to your home directory readr::write_csv2(veg, here("veg_data_2022.csv")) # load the data from your home directory veg <- readr::read_csv2(here("veg_data_2022.csv")) glimpse(veg) # rename columns ----------------------------------------------------------------------------------- veg %<>% rename( Fortlaufendenr = `...1`, Aufnahmenr = Aufnahmenr., WissName = `Wissenschaftlicher Name`, Hoehe.avg = `Höhe Ø (cm)`, Hoehe.var = `Höhe σ²`, Blattflaeche = `Blatt-Fläche (mm²)`, Frischmasse = `Frischmasse (g)`, Trockenmasse = `Trockenmasse (mg)`, Germinulenmasse = `Germinulenmasse (mg)`, Feuchte = `Ellenberg-Feuchte`, Bodenfeuchte = `Bodenfeuchte (%vol)` ) # take a more detailed look at the data skimr::skim(veg) select(veg, -Fortlaufendenr, -Aufnahmenr, -WissName, -Anmerkungen) %>% gtsummary::tbl_summary() # check data for errors ---------------------------------------------------------------------------- # reassign expert water availability measures veg %<>% rows_update(tibble(Aufnahmenr = 1:8, WasserHH = c(1, 2, 3, 5, 6, 7, 8, 4))) # make sure plot-level data don't have duplicates select(veg, Aufnahmenr, Ort, Vegetation, WasserHH, Bodenfeuchte) %>% n_distinct() # should equal the number of plots (n = 8) # make sure species are not duplicated within plots nrow(veg) # 141 select(veg, Aufnahmenr, WissName) %>% n_distinct() # should equal the number of rows (n = 141) # make sure species-level data (from databases) are unique select(veg, WissName) %>% n_distinct() # 79 select(veg, WissName, Lebensform, Germinulenmasse, Bestaeubung, Feuchte) %>% n_distinct() # should equal the number of species (n = 79) # check life form table(veg$Lebensform) filter(veg, Lebensform == "G") %>% pull(WissName) %>% unique() veg %<>% mutate(Lebensform = case_when( WissName == "Phalaris arundinacea" ~ "mL", TRUE ~ Lebensform )) # check % cover per species hist(veg$Deckung) # check total % cover per plot group_by(veg, Aufnahmenr) %>% summarise(Deckung.tot = sum(Deckung)) # check vegetation types at each site table(veg$Ort, veg$Vegetation) # check if Ellenberg humidity indicator values are integers unique(veg$Feuchte) unique(as.integer(veg$Feuchte)) veg %<>% mutate( Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols as.integer() # convert to integer ) # check taxon names tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE) View(tax) veg %<>% mutate( WissName = recode( WissName, `Taraxacum officinalis` = "Taraxacum officinale", `Viola arvense` = "Viola arvensis", ) ) # check leaf area summary(veg$Blattflaeche) veg %<>% mutate(Blattflaeche = na_if(Blattflaeche, 0)) # check seed mass summary(veg$Germinulenmasse) veg %>% filter(Germinulenmasse == .001) %>% pull(WissName) %>% unique() View(available_tr8) gmasse_tr8 <- TR8::tr8( species_list = c("Agrostis stolonifera", "Cerastium semidecandrum"), download_list = c("seed_wght", "SeedMass", "seed_mass"), allow_persistent = TRUE ) gmasse_tr8 veg %<>% mutate(Germinulenmasse = case_when( WissName == "Cerastium semidecandrum" ~ .04, WissName == "Agrostis stolonifera" ~ .07, TRUE ~ Germinulenmasse )) # take a look at the data again skim(veg) # impute missing trait data ------------------------------------------------------------------------ # life form (data from FloraWeb) select(veg, WissName, Lebensform) %>% distinct() %>% filter(is.na(Lebensform)) veg %<>% mutate(Lebensform = case_when( WissName %in% c("Cerastium glomeratum", "Erophila verna") ~ "mL", TRUE ~ Lebensform )) # pollination type (data from FloraWeb) select(veg, WissName, Bestaeubung) %>% distinct() %>% filter(is.na(Bestaeubung)) veg %<>% mutate(Bestaeubung = case_when( WissName %in% c( "Cerastium glomeratum", "Erophila verna", "Valerianella spec." ) ~ "se", TRUE ~ Bestaeubung )) # seed mass (data from Seed Information Database) select(veg, WissName, Germinulenmasse) %>% distinct() %>% filter(is.na(Germinulenmasse)) veg %<>% mutate(Germinulenmasse = case_when( WissName == "Allium vineale" ~ 7, WissName == "Carex ovalis" ~ .52, WissName == "Cerastium glomeratum" ~ .05, WissName == "Cerastium glutinosum" ~ .0654, WissName == "Cruciata laevipes" ~ 3.5856, WissName == "Erodium cicutarium" ~ 2, WissName == "Erophila verna" ~ .024, WissName == "Senecio erucifolius" ~ .3935, WissName == "Saxifraga granulata" ~ .03, WissName == "Prunus mahaleb" ~ 79.9, WissName == "Polygonum persicaria" ~ 2.1, TRUE ~ Germinulenmasse )) summary(veg$Germinulenmasse) # calculate SLA & LDMC ----------------------------------------------------------------------------- veg %<>% mutate( SLA = Blattflaeche / Trockenmasse, LDMC = Trockenmasse / Frischmasse ) # check SLA hist(veg$SLA) # check LDMC hist(veg$LDMC) filter(veg, LDMC > 500) %>% select(Fortlaufendenr, WissName, Deckung, Blattflaeche, Frischmasse, Trockenmasse, LDMC) # make corrections in the data veg %<>% mutate( Frischmasse = if_else(Fortlaufendenr == 127, .2063, Frischmasse), Trockenmasse = if_else(Fortlaufendenr == 79, 85, Trockenmasse) # it's a guess! ) # recalculate SLA & LDMC veg %<>% mutate( SLA = Blattflaeche / Trockenmasse, LDMC = Trockenmasse / Frischmasse ) # analyse species numbers -------------------------------------------------------------------------- # check the number of species per plot artz1 <- table(veg$Aufnahmenr) artz1 # check the number of species at each water availability level artz2 <- table(veg$WasserHH) artz2 # test whether the observed species numbers differ from the expected chisq.test(artz2) chisq.test(artz2)$expected barplot(artz2, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = "Artenzahl" ) # analyse water availability ----------------------------------------------------------------------- # check the correspondence between the Ellenberg values and estimated water availability boxplot(veg$Feuchte ~ veg$WasserHH) # calculate the mean plot-level Ellenberg humidity indicator value WasserHH.Feuchte <- group_by(veg, Aufnahmenr, WasserHH, Bodenfeuchte) %>% summarise( # community mean Feuchte_CM = mean(Feuchte, na.rm = TRUE) %>% round(., 1), # community weighted mean Feuchte_CWM = weighted.mean(x = Feuchte, w = Deckung, na.rm = TRUE) %>% round(., 1) ) # plot the plot-level Ellenberg humidity op <- par(mfcol = c(1, 2), mar = c(4, 4, 2, 2)) plot(Feuchte_CWM ~ WasserHH, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "mittlere Feuchte nach Ellenberg" ) points(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20) legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20) plot(Feuchte_CWM ~ Bodenfeuchte, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "") points(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20) legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20) par(op) # calculate correlation between the water availability and plot-level Ellenberg humidity cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, method = "kendall", exact = FALSE) cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE) cor.test(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE ) # add the calculated mean Ellenberg humidity to the main data table veg %<>% left_join(WasserHH.Feuchte) glimpse(veg) # analyse LDMC ------------------------------------------------------------------------------------- # LDMC = leaf dry matter content # inspect the distribution of LDMC visually hist(veg$LDMC) # plot LDMC across water availability levels boxplot( LDMC ~ WasserHH, dat = veg, ylab = "LDMC", xlab = "geschaetzte Wasserverfuegbarkeit (ordinal)", col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y" ) boxplot(LDMC ~ Feuchte_CWM, dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg", x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y" ) ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + geom_point(na.rm = TRUE) + stat_smooth(method = "lm", col = '#E69F00', se = FALSE, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]") + theme_minimal() # perform regression analyses # using WasserHH as explanatory variable fm0.LDMC <- lm(LDMC ~ WasserHH, data = veg) summary(fm0.LDMC) performance::check_model(fm0.LDMC) fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover summary(fm1.LDMC) performance::check_model(fm1.LDMC) # compare the two models tbl_merge( tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)), tab_spanner = c("**Non-weighted**", "**Weigted**") ) # using Bodenfeuchte as explanatory variable fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg) summary(fm0.LDMC.b) performance::check_model(fm0.LDMC.b) fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.LDMC.b) performance::check_model(fm1.LDMC.b) # compare the two models tbl_merge( tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)), tab_spanner = c("**Non-weighted**", "**Weigted**") ) ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + geom_point(na.rm = TRUE) + geom_abline(intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2], col = '#E69F00', size = 1, na.rm = TRUE) + geom_abline(intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2], col = '#56B4E9', size = 1, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]") + theme_minimal() + annotate("text", x = 20, y = 270, label = "Non-weighted", col = '#E69F00') + annotate("text", x = 25, y = 320, label = "Weighted", col = '#56B4E9') # analyse SLA -------------------------------------------------------------------------------------- # SLA = specific leaf area # inspect the distribution of SLA visually hist(veg$SLA) hist(log(veg$SLA)) # test the normality of distribution shapiro_test(veg$SLA) shapiro_test(log(veg$SLA)) # add log-transformed values to the main data table veg %<>% mutate(SLA.log = log(SLA)) # plot the distribution of SLA across water availability levels op <- par(mar = c(4, 5, 2, 3)) boxplot(SLA ~ WasserHH, dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = "SLA", col = "blue", log = "y" ) filter(veg, WasserHH == 2 & SLA > 25) %>% select(WissName, Aufnahmenr, SLA) filter(veg, WissName == "Euphorbia cyparissias") %>% select(WissName, Aufnahmenr, SLA) boxplot(SLA.log ~ WasserHH, dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(log("SLA")), col = "blue" ) par(op) filter(veg, WasserHH == 7 & SLA.log < 3) %>% select(WissName, Aufnahmenr, SLA) # perform regression analyses fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg) summary(fm1.SLA) performance::check_model(fm1.SLA) fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg) summary(fm1.SLA.log) performance::check_model(fm1.SLA.log) fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.SLA.log.b) performance::check_model(fm1.SLA.log.b) # plot the regression line ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) + geom_point(na.rm = TRUE) + geom_abline(intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2], col = '#56B4E9', size = 1, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) + theme_minimal() # look at the relationship between SLA and LDMC plot(SLA ~ LDMC, data = veg) cor.test(veg$SLA, veg$LDMC, use = "complete.obs") # analyse height ----------------------------------------------------------------------------------- # inspect distributions visually hist(veg$Hoehe.avg) hist(log(veg$Hoehe.avg)) # test the normality of distribution shapiro_test(veg$Hoehe.avg) shapiro_test(log(veg$Hoehe.avg)) # add log-transformed values to the main data table veg %<>% mutate(Hoehe.avg.log = log(Hoehe.avg)) # plot the distribution of height across water availability levels op <- par(mar = c(4, 5, 2, 3)) boxplot(Hoehe.avg ~ WasserHH, data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = "Wuchshöhe [cm]", col = rainbow(7, start = 1, end = 5 / 7), log = "y" ) filter(veg, WasserHH == 5 & Hoehe.avg > 50) %>% select(WissName, Aufnahmenr, Hoehe.avg) filter(veg, WasserHH == 7 & Hoehe.avg < 20) %>% select(WissName, Aufnahmenr, Hoehe.avg) boxplot(Hoehe.avg.log ~ WasserHH, data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(log("Wuchshöhe [cm]")), col = rainbow(7, start = 1, end = 5 / 7) ) par(op) # perform regression analysis fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg) summary(fm1.hoehe.avg.log) performance::check_model(fm1.hoehe.avg.log) fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg) summary(fm1.hoehe.avg.log.b) performance::check_model(fm1.hoehe.avg.log.b) # plot the regression line ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) + geom_point(na.rm = TRUE) + geom_abline(intercept = fm1.hoehe.avg.log.b$coefficients[1], slope = fm1.hoehe.avg.log.b$coefficients[2], col = '#56B4E9', size = 1, na.rm = TRUE) + geom_smooth(col = "red", se = FALSE, na.rm = TRUE) + labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) + theme_minimal() # analyse height variance -------------------------------------------------------------------------- # plot height variance hist(veg$Hoehe.var) hist(log(veg$Hoehe.var)) # add log-transformed values to the main data table veg %<>% mutate(Hoehe.var.log = log(Hoehe.var)) # check the relationship between height and height variance boxplot(Hoehe.var.log ~ WasserHH, data = veg) plot(Hoehe.var.log ~ Hoehe.avg, data = veg) cor.test(veg$Hoehe.var.log, veg$Hoehe.avg) # calculate the coefficient of variation veg %<>% mutate(Hoehe.cv = sqrt(Hoehe.var) / Hoehe.avg * 100) hist(veg$Hoehe.cv) hist(log(veg$Hoehe.cv)) veg %<>% mutate(Hoehe.cv.log = log(Hoehe.cv)) plot(Hoehe.cv.log ~ Hoehe.avg, data = veg) cor.test(veg$Hoehe.cv.log, veg$Hoehe.avg) boxplot(Hoehe.cv ~ WasserHH, data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1] ) filter(veg, WasserHH == 3 & Hoehe.cv > 100) %>% select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv) # perform regression analysis fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung) summary(fm1.Hoehe.cv.log) performance::check_model(fm1.Hoehe.cv.log) fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung) summary(fm1.Hoehe.cv.log.b) performance::check_model(fm1.Hoehe.cv.log.b) # analyse seed mass -------------------------------------------------------------------------------- hist(veg$Germinulenmasse) hist(log(veg$Germinulenmasse)) veg %<>% mutate(Germinulenmasse.log = log(Germinulenmasse)) op <- par(mar = c(4, 5, 2, 3)) boxplot(Germinulenmasse ~ WasserHH, data = veg, log = "y") boxplot(Germinulenmasse.log ~ WasserHH, data = veg, ylab = expression(log("Germinulenmasse"))) filter(veg, Germinulenmasse.log > 4) %>% select(WissName, Germinulenmasse) par(op) # perform regression analysis fm1.gmasse.log <- lm(Germinulenmasse.log ~ WasserHH, data = veg, weight = Deckung) summary(fm1.gmasse.log) check_model(fm1.gmasse.log) fm1.gmasse.log.b <- lm(Germinulenmasse.log ~ Bodenfeuchte, data = veg, weight = Deckung) summary(fm1.gmasse.log.b) check_model(fm1.gmasse.log.b) # analyse life form -------------------------------------------------------------------------------- # compare the distributions of life forms across vegetation types select(veg, Lebensform, Vegetation) %>% gtsummary::tbl_summary(by = Vegetation) %>% add_p() # calculate total % cover per life form across humidity levels lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>% summarise(Deckung = sum(Deckung)) head(lft.1) tail(lft.1) lft.2 <- group_by(lft.1, Lebensform, WasserHH) %>% summarise(Deckung = mean(Deckung), .groups = "drop") head(lft.2) lft.3 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.2) lft.3 <- DescTools::as.matrix.xtabs(lft.3) lft.3 chisq_test(lft.3, simulate.p.value = TRUE) # plot the distribution of life forms across humidity levels # using base R op <- par(mar = c(4, 5, 2, 3)) barplot(lft.3, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")), main = "Lebensformenspektren", ylim = c(0, 160) ) legend(5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3))) par(op) # using ggplot2 ggplot(data = lft.2, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) + geom_bar(position = "stack", stat = "identity") + viridis::scale_fill_viridis(discrete = TRUE) + labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + ggtitle("Lebensformenspektren") + theme_minimal() # analyse pollination type ------------------------------------------------------------------------- # compare the distributions of pollination syndromes across vegetation types select(veg, Bestaeubung, Vegetation) %>% gtsummary::tbl_summary(by = Vegetation) %>% add_p() # calculate total % cover per pollination type across humidity levels bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>% summarise(Deckung = sum(Deckung)) head(bst.1) tail(bst.1) bst.2 <- group_by(bst.1, Bestaeubung, WasserHH) %>% summarise(Deckung = mean(Deckung), .groups = "drop") head(bst.2) bst.3 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.2) bst.3 <- DescTools::as.matrix.xtabs(bst.3) bst.3 chisq_test(bst.3, simulate.p.value = TRUE) # plot the distribution of pollination types # using base R op <- par(mar = c(4, 5, 2, 3)) cols <- c("yellow", "brown", "grey", "lightblue") barplot(bst.3, xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")), main = "Bestäubungstypenspektren", sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols ) legend(0, 140, legend = c("Wind", "Selbst", "multipel", "Insekten"), pch = 15, col = cols[4:1], cex = 0.7 ) # using ggplot2 ggplot(data = bst.2, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) + geom_bar(position = "stack", stat = "identity") + viridis::scale_fill_viridis( discrete = TRUE, labels = c("Insekten", "multipel", "Selbst", "Wind") ) + labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) + ggtitle("Bestäubungstypenspektren") + theme_minimal() # analyse the relationships between traits --------------------------------------------------------- # test if height varies across life forms # plot height by life form ggpubr::ggboxplot(veg, x = "Lebensform", y = "Hoehe.avg.log", select = c("H", "G", "T", "mL")) # check the ANOVA assumptions # outliers group_by(veg, Lebensform) %>% identify_outliers(Hoehe.avg.log) # normality lm.hoe.lf <- lm(Hoehe.avg.log ~ Lebensform, data = veg) ggpubr::ggqqplot(residuals(lm.hoe.lf)) rstatix::shapiro_test(residuals(lm.hoe.lf)) # homogeneity of variances levene_test(veg, Hoehe.avg.log ~ Lebensform) # perform ANOVA and the Kruskal-Wallis test (non-parametric alternative) anova_test(veg, Hoehe.avg.log ~ Lebensform) kruskal_test(veg, Hoehe.avg.log ~ Lebensform) # test if the height varies across pollination types ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log") group_by(veg, Bestaeubung) %>% identify_outliers(Hoehe.avg.log) %>% select(Fortlaufendenr, WissName, Hoehe.avg, Hoehe.avg.log, is.outlier, is.extreme) lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg) ggqqplot(residuals(lm.hoe.best)) shapiro_test(residuals(lm.hoe.best)) levene_test(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.aov <- anova_test(veg, Hoehe.avg.log ~ Bestaeubung) # perform a post-hoc test (i.e. pairwise comparisons) hoe.best.pwc <- tukey_hsd(veg, Hoehe.avg.log ~ Bestaeubung) hoe.best.pwc # plot the results hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung") ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") + stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) + labs( subtitle = get_test_label(hoe.best.aov, detailed = TRUE), caption = get_pwc_label(hoe.best.pwc), y = expression(log("Wuchshöhe")), x = "Bestäubungstyp" ) # test if the SLA varies across life forms ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL")) group_by(veg, Lebensform) %>% identify_outliers(SLA.log) lm.sla.lf <- lm(SLA.log ~ Lebensform, data = veg) ggqqplot(residuals(lm.sla.lf)) shapiro_test(residuals(lm.sla.lf)) levene_test(veg, SLA.log ~ Lebensform) anova_test(veg, SLA.log ~ Lebensform) # test if the seed mass varies across life forms gmasse <- select(veg, WissName, Germinulenmasse.log, Lebensform, Bestaeubung) %>% distinct() ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") group_by(gmasse, Lebensform) %>% identify_outliers(Germinulenmasse.log) lm.gmasse.lf <- lm(Germinulenmasse.log ~ Lebensform, data = gmasse) ggqqplot(residuals(lm.gmasse.lf)) shapiro_test(residuals(lm.gmasse.lf)) levene_test(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform) gmasse.lf.pwc gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform") ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") + stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) + labs( subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE), caption = get_pwc_label(gmasse.lf.pwc), y = expression(log("Germinulenmasse")), x = "Lebensform" ) # test if the seed mass varies across pollination types ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") group_by(gmasse, Bestaeubung) %>% identify_outliers(Germinulenmasse.log) fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse) ggpubr::ggqqplot(residuals(fm.gmasse.best)) rstatix::shapiro_test(residuals(fm.gmasse.best)) ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung") gmasse.best.aov <- anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung) gmasse.best.aov gmasse.best.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Bestaeubung) gmasse.best.pwc gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung") ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") + stat_pvalue_manual(gmasse.best.pwc, hide.ns = TRUE) + labs( subtitle = get_test_label(gmasse.best.aov, detailed = TRUE), caption = get_pwc_label(gmasse.best.pwc), y = expression(log("Germinulenmasse")), x = "Bestäubungstyp" ) # analyse species composition ---------------------------------------------------------------------- mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg) str(mat) # perform correspondence analysis decorana(mat) fm.ca <- cca(mat) fm.ca summary(fm.ca) plot(fm.ca, display = "sites") plot(fm.ca) # shorten taxon names wname <- str_remove(veg$WissName, pattern = " cf\\.| x") %>% str_split(pattern = " ", simplify = TRUE) %>% substr(., 1, 3) wname <- paste(wname[, 1], wname[, 2], sep = ".") veg %<>% mutate(wname = wname) # repeat the same analysis with shortened taxon names provided mat <- xtabs(Deckung ~ Aufnahmenr + wname, data = veg) fm.ca <- cca(mat) summary(fm.ca) # plot the results summary(fm.ca)$species %>% as_tibble() %>% mutate(wname = rownames(summary(fm.ca)$species)) %>% ggplot(aes(x = CA1, y = CA2, label = wname)) + geom_point() + ggrepel::geom_text_repel(size = 1.5, force = 15, max.overlaps = 30) + theme_bw() # prepare data for trait ordination ---------------------------------------------------------------- # calculate the proportion of each life form per plot lf.kt <- select(veg, Aufnahmenr, Deckung, Lebensform) %>% pivot_wider( names_from = Lebensform, values_from = Deckung, values_fn = sum, values_fill = 0, names_prefix = "lf." ) %>% mutate(total = rowSums(.[, -1])) %>% mutate(across(lf.H:lf.M, ~ . / total), .keep = "unused") # calculate the proportion of each pollination type per plot best.kt <- select(veg, Aufnahmenr, Deckung, Bestaeubung) %>% pivot_wider( names_from = Bestaeubung, values_from = Deckung, values_fn = sum, values_fill = 0, names_prefix = "b." ) %>% mutate(total = rowSums(.[, -1])) %>% mutate(across(b.wi:b.mb, ~ . / total), .keep = "unused") # compute community weighted means for numerical traits cwm <- group_by(veg, Aufnahmenr) %>% summarise(across( c(Hoehe.avg.log, Hoehe.cv.log, LDMC, SLA.log, Germinulenmasse.log), ~ weighted.mean(., w = Deckung, na.rm = TRUE) )) %>% rename( Ho.av = Hoehe.avg.log, Ho.cv = Hoehe.cv.log, SLA = SLA.log, GM = Germinulenmasse.log ) # combine the three tables cwm %<>% left_join(lf.kt) %>% left_join(best.kt) cwm # perform principal components analysis (PCA) of CWMs ---------------------------------------------- # analyse all traits pca1 <- FactoMineR::PCA(cwm[, -c(1:2)]) summary(pca1) cowplot::plot_grid( plot.PCA(pca1), plot.PCA(pca1, c(1, 3)) ) # analyse numerical traits only colnames(cwm) pca2 <- PCA(select(cwm, Ho.av:GM)) cowplot::plot_grid( plot.PCA(pca2), plot.PCA(pca2, c(1, 3)) ) # perform PCA of species traits -------------------------------------------------------------------- # create a separate table containing numerical traits dat <- select( veg, WissName, Vegetation, Blattflaeche, Frischmasse, Trockenmasse, SLA, LDMC, Hoehe.avg, Hoehe.var, Germinulenmasse ) %>% rename( BlF = Blattflaeche, FrM = Frischmasse, TrM = Trockenmasse, Ho.av = Hoehe.avg, Ho.var = Hoehe.var, GM = Germinulenmasse ) %>% na.omit() glimpse(dat) # perform PCA pca3 <- FactoMineR::PCA(dat[, -c(1:2)]) summary(pca3) plot.PCA(pca3) # add PC to the trait table dat$pc1 <- pca3$ind$coord[, 1] dat$pc2 <- pca3$ind$coord[, 2] pc12 <- pca3$ind$coord[, 1:2] # plot the PCA results ggplot(data = dat, aes(x = pc1, y = pc2, color = Vegetation, shape = Vegetation)) + geom_hline(yintercept = 0, lty = 2) + geom_vline(xintercept = 0, lty = 2) + geom_point(alpha = .8) + stat_ellipse( geom = "polygon", aes(fill = Vegetation), alpha = .2, show.legend = FALSE, level = .95 ) + guides( color = guide_legend(title = "Vegetation"), shape = guide_legend(title = "Vegetation") ) + labs(x = "PC 1 (45.24%)", y = "PC 2 (20.27%)") + theme_minimal() + theme( panel.grid = element_blank(), panel.border = element_rect(fill = "transparent"), legend.position = "bottom" ) # plot PCA results using kernels # Code from Björn Reu # Used to produce Fig. 2 in Diaz et al. 2016, Nature H <- Hpi(x = pc12) # optimal bandwidth estimation est <- kde(x = pc12, H = H, compute.cont = TRUE) # kernel density estimation # set contour probabilities for drawing contour levels cl <- contourLevels(est, prob = c(.5, .05, .001), approx = TRUE) # use envfit for drawing arrows, can be also done using trait loadings fit <- envfit(pc12, select(dat, BlF:GM)) # create a plot par(mar = c(4, 4, 2, 2)) cols <- ifelse(dat$Vegetation == "TR", "darkred", "darkblue") plot(est, cont = seq(1, 100, by = 1), display = "filled.contour2", add = FALSE, ylab = "", xlab = "", cex.axis = .75, ylim = c(-5, 4), xlim = c(-3, 7), las = 1 ) plot(est, abs.cont = cl[1], labels = .5, labcex = .75, add = TRUE, lwd = .75, col = "grey30") plot(est, abs.cont = cl[2], labels = .95, labcex = .75, add = TRUE, lwd = .5, col = "grey60") plot(est, abs.cont = cl[3], labels = .99, labcex = .75, add = TRUE, lwd = .5, col = "grey60") points(pc12, pch = 16, cex = .5, col = cols) plot(fit, cex = .90, col = 1) mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8) mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8) dat[dat$pc1 > 4, ] dat[dat$pc2 < -3, ]