MarinaGolivets · July 13, 2022 06:37 · Jul 13, 2022 · Jul 13, 2022 · Jul 12, 2022 · Jul 12, 2022
diff --git a/GP_Oekologie_2022.R b/GP_Oekologie_2022.R
@@ -40,7 +40,7 @@ for (p in pkgs) {
   library(p, character.only = TRUE)
 }
 
-# choose an option to treat warnings as errors
+# choose an option to not treat warnings as errors
 options(warn = 1)
 
 # check what packages are being used on your machine

diff --git a/GP_Oekologie_2022.R b/GP_Oekologie_2022.R
@@ -772,6 +772,8 @@ write_csv2(veg, here("veg_data_2020_edited.csv"))
 
 # analyse species composition ----------------------------------------------------------------------
 
+veg <- read_csv2(here("veg_data_2022_edited.csv"))
+
 mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
 str(mat)
 

diff --git a/GP_Oekologie_2022.R b/GP_Oekologie_2022.R
@@ -50,6 +50,7 @@ sessionInfo()
 here()
 
 # load data ----------------------------------------------------------------------------------------
+
 if (length(list.files(here(), "veg_data_2022.csv")) == 0) {
   # load the data directly from the Google sheet
   veg <- googlesheets4::read_sheet(
@@ -664,6 +665,8 @@ levene_test(veg, Hoehe.avg.log ~ Lebensform)
 
 # perform ANOVA and the Kruskal-Wallis test (non-parametric alternative)
 anova_test(veg, Hoehe.avg.log ~ Lebensform)
+# because the normality of residuals assumption was violated
+# the Kruskal Wallis test should be used instead of a standard ANOVA
 kruskal_test(veg, Hoehe.avg.log ~ Lebensform)
 
 
@@ -677,10 +680,12 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
 ggqqplot(residuals(lm.hoe.best))
 shapiro_test(residuals(lm.hoe.best))
 levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
-hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)
-
 
-# perform a post-hoc test (i.e. pairwise comparisons)
+# because the normality of residuals assumption was violated
+# we use the Kruskal Wallis test instead of a standard ANOVA
+# and the Dunn's post hoc test for pairwise comparisons
+hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)
+hoe.best.kw
 hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung)
 hoe.best.pwc
 
@@ -742,9 +747,14 @@ fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse)
 ggpubr::ggqqplot(residuals(fm.gmasse.best))
 rstatix::shapiro_test(residuals(fm.gmasse.best))
 ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung")
-gmasse.best.aov <- anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
+levene_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
+
+# because the homogeneity of variance assumption was violated
+# we use the Welch test instead of a standard ANOVA
+# and the Games-Howell post hoc test for pairwise comparisons
+gmasse.best.aov <- welch_anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
 gmasse.best.aov
-gmasse.best.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Bestaeubung)
+gmasse.best.pwc <- games_howell_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
 gmasse.best.pwc
 
 gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung")
@@ -931,4 +941,4 @@ mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8)
 mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8)
 
 dat[dat$pc1 > 4, ]
-dat[dat$pc2 < -3, ]
+dat[dat$pc2 < -3, ]
diff --git a/GP_Oekologie_2022.R b/GP_Oekologie_2022.R
@@ -757,6 +757,9 @@ ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") +
     x = "Bestäubungstyp"
   )
 
+# save the edited data as a separate file
+write_csv2(veg, here("veg_data_2020_edited.csv"))
+
 # analyse species composition ----------------------------------------------------------------------
 
 mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)

diff --git a/GP_Oekologie_2022.R b/GP_Oekologie_2022.R
@@ -29,9 +29,11 @@ pkgs <- c(
 )
 
 # install (if not installed) and load all the packages
-install.packages("devtools")
-devtools::install_github("ddsjoberg/gtsummary")
-devtools::install_github("ropensci/skimr")
+
+# # run only once!
+# install.packages("devtools")
+# devtools::install_github("ddsjoberg/gtsummary")
+# devtools::install_github("ropensci/skimr")
 
 for (p in pkgs) {
   if (!require(p, character.only = TRUE)) install.packages(p)
@@ -48,23 +50,22 @@ sessionInfo()
 here()
 
 # load data ----------------------------------------------------------------------------------------
-
-# load the data directly from the Google sheet
-veg <- googlesheets4::read_sheet(
-  "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
-  sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
-)
+if (length(list.files(here(), "veg_data_2022.csv")) == 0) {
+  # load the data directly from the Google sheet
+  veg <- googlesheets4::read_sheet(
+    "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
+    sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
+  )
+  # save the data to your home directory
+  readr::write_csv2(veg, here("veg_data_2022.csv"))
+} else {
+  # load the data from your home directory
+  veg <- readr::read_csv2(here("veg_data_2022.csv"))
+}
 
 # take a quick look at the data
 glimpse(veg)
 
-# save the data to your home directory
-readr::write_csv2(veg, here("veg_data_2022.csv"))
-
-# load the data from your home directory
-veg <- readr::read_csv2(here("veg_data_2022.csv"))
-glimpse(veg)
-
 # rename columns -----------------------------------------------------------------------------------
 
 veg %<>%
@@ -135,6 +136,7 @@ veg %<>%
     Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols
       as.integer() # convert to integer
   )
+veg[veg$Fortlaufendenr == 41, "Feuchte"] <- NA
 
 # check taxon names
 tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE)
@@ -144,9 +146,10 @@ veg %<>%
     WissName = recode(
       WissName,
       `Taraxacum officinalis` = "Taraxacum officinale",
-      `Viola arvense` = "Viola arvensis",
+      `Viola arvense` = "Viola arvensis"
     )
   )
+veg[veg$Fortlaufendenr == 41, "WissName"] <- "Agrostis capillaris"
 
 # check leaf area
 summary(veg$Blattflaeche)
@@ -169,6 +172,7 @@ veg %<>%
   mutate(Germinulenmasse = case_when(
     WissName == "Cerastium semidecandrum" ~ .04,
     WissName == "Agrostis stolonifera" ~ .07,
+    Fortlaufendenr == 41 ~ .1,
     TRUE ~ Germinulenmasse
   ))
 
@@ -220,6 +224,9 @@ veg %<>%
   ))
 summary(veg$Germinulenmasse)
 
+# convert life form and pollination to factor
+veg %<>% mutate(across(c(Lebensform, Bestaeubung), ~ as.factor(.)))
+
 # calculate SLA & LDMC -----------------------------------------------------------------------------
 
 veg %<>%
@@ -325,9 +332,9 @@ boxplot(LDMC ~ Feuchte_CWM,
   dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg",
   x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
 )
-ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + 
+ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
   geom_point(na.rm = TRUE) +
-  stat_smooth(method = "lm", col = '#E69F00', se = FALSE, na.rm = TRUE) +
+  stat_smooth(method = "lm", col = "#E69F00", se = FALSE, na.rm = TRUE) +
   labs(x = "Bodenfeuchte [%]") +
   theme_minimal()
 
@@ -339,39 +346,44 @@ performance::check_model(fm0.LDMC)
 
 fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover
 summary(fm1.LDMC)
-performance::check_model(fm1.LDMC)
+check_model(fm1.LDMC)
 
 # compare the two models
 tbl_merge(
   tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)),
   tab_spanner = c("**Non-weighted**", "**Weigted**")
 )
 
-# using Bodenfeuchte as explanatory variable
+# using soil moisture (Bodenfeuchte) as explanatory variable
 fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg)
 summary(fm0.LDMC.b)
-performance::check_model(fm0.LDMC.b)
+check_model(fm0.LDMC.b)
 
 fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg)
 summary(fm1.LDMC.b)
-performance::check_model(fm1.LDMC.b)
+check_model(fm1.LDMC.b)
 
 # compare the two models
 tbl_merge(
   tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)),
   tab_spanner = c("**Non-weighted**", "**Weigted**")
 )
 
-ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + 
+plot1 <- ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
   geom_point(na.rm = TRUE) +
-  geom_abline(intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2], 
-              col = '#E69F00', size = 1, na.rm = TRUE) +
-  geom_abline(intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2], 
-              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  geom_abline(
+    intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2],
+    col = "#E69F00", size = 1, na.rm = TRUE
+  ) +
+  geom_abline(
+    intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2],
+    col = "#56B4E9", size = 1, na.rm = TRUE
+  ) +
   labs(x = "Bodenfeuchte [%]") +
   theme_minimal() +
-  annotate("text", x = 20, y = 270, label = "Non-weighted", col = '#E69F00') +
-  annotate("text", x = 25, y = 320, label = "Weighted", col = '#56B4E9')
+  annotate("text", x = 20, y = 270, label = "Non-weighted", col = "#E69F00") +
+  annotate("text", x = 25, y = 320, label = "Weighted", col = "#56B4E9")
+plot1
 
 # analyse SLA --------------------------------------------------------------------------------------
 
@@ -410,23 +422,26 @@ filter(veg, WasserHH == 7 & SLA.log < 3) %>%
 # perform regression analyses
 fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg)
 summary(fm1.SLA)
-performance::check_model(fm1.SLA)
+check_model(fm1.SLA)
 
 fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg)
 summary(fm1.SLA.log)
-performance::check_model(fm1.SLA.log)
+check_model(fm1.SLA.log)
 
 fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg)
 summary(fm1.SLA.log.b)
-performance::check_model(fm1.SLA.log.b)
+check_model(fm1.SLA.log.b)
 
 # plot the regression line
-ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) + 
+plot2 <- ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) +
   geom_point(na.rm = TRUE) +
-  geom_abline(intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2], 
-              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  geom_abline(
+    intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2],
+    col = "#56B4E9", size = 1, na.rm = TRUE
+  ) +
   labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) +
   theme_minimal()
+plot2
 
 # look at the relationship between SLA and LDMC
 plot(SLA ~ LDMC, data = veg)
@@ -453,9 +468,9 @@ boxplot(Hoehe.avg ~ WasserHH,
   log = "y"
 )
 
-filter(veg, WasserHH == 5 & Hoehe.avg > 50) %>%
+filter(veg, WasserHH == 6 & Hoehe.avg > 50) %>%
   select(WissName, Aufnahmenr, Hoehe.avg)
-filter(veg, WasserHH == 7 & Hoehe.avg < 20) %>%
+filter(veg, WasserHH == 8 & Hoehe.avg < 20) %>%
   select(WissName, Aufnahmenr, Hoehe.avg)
 
 boxplot(Hoehe.avg.log ~ WasserHH,
@@ -467,21 +482,24 @@ par(op)
 # perform regression analysis
 fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg)
 summary(fm1.hoehe.avg.log)
-performance::check_model(fm1.hoehe.avg.log)
+check_model(fm1.hoehe.avg.log)
 
 fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg)
 summary(fm1.hoehe.avg.log.b)
-performance::check_model(fm1.hoehe.avg.log.b)
+check_model(fm1.hoehe.avg.log.b)
 
 # plot the regression line
-ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) + 
+plot3 <- ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) +
   geom_point(na.rm = TRUE) +
-  geom_abline(intercept = fm1.hoehe.avg.log.b$coefficients[1], 
-              slope = fm1.hoehe.avg.log.b$coefficients[2], 
-              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  geom_abline(
+    intercept = fm1.hoehe.avg.log.b$coefficients[1],
+    slope = fm1.hoehe.avg.log.b$coefficients[2],
+    col = "#56B4E9", size = 1, na.rm = TRUE
+  ) +
   geom_smooth(col = "red", se = FALSE, na.rm = TRUE) +
   labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) +
   theme_minimal()
+plot3
 
 # analyse height variance --------------------------------------------------------------------------
 
@@ -512,17 +530,17 @@ boxplot(Hoehe.cv ~ WasserHH,
   data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
   ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1]
 )
-filter(veg, WasserHH == 3 & Hoehe.cv > 100) %>%
+filter(veg, WasserHH == 4 & Hoehe.cv > 100) %>%
   select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv)
 
 # perform regression analysis
 fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung)
 summary(fm1.Hoehe.cv.log)
-performance::check_model(fm1.Hoehe.cv.log)
+check_model(fm1.Hoehe.cv.log)
 
 fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung)
 summary(fm1.Hoehe.cv.log.b)
-performance::check_model(fm1.Hoehe.cv.log.b)
+check_model(fm1.Hoehe.cv.log.b)
 
 # analyse seed mass --------------------------------------------------------------------------------
 
@@ -560,14 +578,10 @@ lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>%
 head(lft.1)
 tail(lft.1)
 
-lft.2 <- group_by(lft.1, Lebensform, WasserHH) %>%
-  summarise(Deckung = mean(Deckung), .groups = "drop")
-head(lft.2)
-
-lft.3 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.2)
-lft.3 <- DescTools::as.matrix.xtabs(lft.3)
-lft.3
-chisq_test(lft.3, simulate.p.value = TRUE)
+lft.2 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.1)
+lft.2 <- DescTools::as.matrix.xtabs(lft.3)
+lft.2
+chisq_test(lft.2, simulate.p.value = TRUE)
 
 # plot the distribution of life forms across humidity levels
 # using base R
@@ -577,11 +591,11 @@ barplot(lft.3,
   ylab = expression(sum("Deckung")),
   main = "Lebensformenspektren", ylim = c(0, 160)
 )
-legend(5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
+legend(8.5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
 par(op)
 
 # using ggplot2
-ggplot(data = lft.2, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
+ggplot(data = lft.1, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
   geom_bar(position = "stack", stat = "identity") +
   viridis::scale_fill_viridis(discrete = TRUE) +
   labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
@@ -601,14 +615,10 @@ bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>%
 head(bst.1)
 tail(bst.1)
 
-bst.2 <- group_by(bst.1, Bestaeubung, WasserHH) %>%
-  summarise(Deckung = mean(Deckung), .groups = "drop")
-head(bst.2)
-
-bst.3 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.2)
-bst.3 <- DescTools::as.matrix.xtabs(bst.3)
-bst.3
-chisq_test(bst.3, simulate.p.value = TRUE)
+bst.2 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.1)
+bst.2 <- DescTools::as.matrix.xtabs(bst.2)
+bst.2
+chisq_test(bst.2, simulate.p.value = TRUE)
 
 # plot the distribution of pollination types
 # using base R
@@ -620,19 +630,20 @@ barplot(bst.3,
   sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols
 )
 legend(0, 140,
-  legend = c("Wind", "Selbst", "multipel", "Insekten"),
+  legend = c("Wind", "Selbst", "multiple", "Insekten"),
   pch = 15, col = cols[4:1], cex = 0.7
 )
 
 # using ggplot2
-ggplot(data = bst.2, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
+plot4 <- ggplot(data = bst.1, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
   geom_bar(position = "stack", stat = "identity") +
   viridis::scale_fill_viridis(
-    discrete = TRUE, labels = c("Insekten", "multipel", "Selbst", "Wind")
+    discrete = TRUE, labels = c("Insekten", "multiple", "Selbst", "Wind")
   ) +
   labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
   ggtitle("Bestäubungstypenspektren") +
   theme_minimal()
+plot4
 
 # analyse the relationships between traits ---------------------------------------------------------
 
@@ -657,6 +668,7 @@ kruskal_test(veg, Hoehe.avg.log ~ Lebensform)
 
 
 # test if the height varies across pollination types
+veg$Bestaeubung <- as.factor(veg$Bestaeubung)
 ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log")
 group_by(veg, Bestaeubung) %>%
   identify_outliers(Hoehe.avg.log) %>%
@@ -665,23 +677,24 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
 ggqqplot(residuals(lm.hoe.best))
 shapiro_test(residuals(lm.hoe.best))
 levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
-hoe.best.aov <- anova_test(veg, Hoehe.avg.log ~ Bestaeubung)
+hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)
+
 
 # perform a post-hoc test (i.e. pairwise comparisons)
-hoe.best.pwc <- tukey_hsd(veg, Hoehe.avg.log ~ Bestaeubung)
+hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung)
 hoe.best.pwc
 
 # plot the results
 hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung")
-ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
+plot6 <- ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
   stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) +
   labs(
-    subtitle = get_test_label(hoe.best.aov, detailed = TRUE),
+    subtitle = get_test_label(hoe.best.kw, detailed = TRUE),
     caption = get_pwc_label(hoe.best.pwc),
     y = expression(log("Wuchshöhe")),
     x = "Bestäubungstyp"
   )
-
+plot6
 
 # test if the SLA varies across life forms
 ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL"))
@@ -705,18 +718,20 @@ ggqqplot(residuals(lm.gmasse.lf))
 shapiro_test(residuals(lm.gmasse.lf))
 levene_test(gmasse, Germinulenmasse.log ~ Lebensform)
 gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform)
+gmasse.lf.aov
 gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform)
 gmasse.lf.pwc
 
 gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform")
-ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
+plot7 <- ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
   stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) +
   labs(
     subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE),
     caption = get_pwc_label(gmasse.lf.pwc),
     y = expression(log("Germinulenmasse")),
     x = "Lebensform"
   )
+plot7
 
 
 # test if the seed mass varies across pollination types

diff --git a/GP Oekologie 2022 → GP_Oekologie_2022.R b/GP Oekologie 2022 → GP_Oekologie_2022.R
diff --git a/GP Oekologie 2022 b/GP Oekologie 2022
@@ -0,0 +1,916 @@
+# GP Oekologie 2022
+# Ingolf Kühn & Marina Golivets
+
+
+# install & load R packages ------------------------------------------------------------------------
+
+rm(list = ls())
+
+# packages that we need for analyses
+pkgs <- c(
+  "here", # for handling working directory
+  "googlesheets4", # for loading data from Google spreadsheets
+  "tidyverse", # for data manipulation
+  "magrittr", # for piping
+  "skimr", # for summarizing data
+  "gtsummary", # for summarizing data
+  "taxize", # for checking taxonomic names
+  "TR8", # for retrieving trait data from databases
+  "performance", # for inspecting regression models,
+  "rstatix", # for statistical analyses, e.g. ANOVA
+  "DescTools", # for miscellaneous basic stats
+  "ggpubr", # for plotting
+  "ggrepel", # for plotting
+  "viridis", # for plotting
+  "cowplot", # for plotting
+  "vegan", # for multivariate analyses
+  "FactoMineR", # for PCA
+  "ks" # for kernel smoothers
+)
+
+# install (if not installed) and load all the packages
+install.packages("devtools")
+devtools::install_github("ddsjoberg/gtsummary")
+devtools::install_github("ropensci/skimr")
+
+for (p in pkgs) {
+  if (!require(p, character.only = TRUE)) install.packages(p)
+  library(p, character.only = TRUE)
+}
+
+# choose an option to treat warnings as errors
+options(warn = 1)
+
+# check what packages are being used on your machine
+sessionInfo()
+
+# check your working (home) directory
+here()
+
+# load data ----------------------------------------------------------------------------------------
+
+# load the data directly from the Google sheet
+veg <- googlesheets4::read_sheet(
+  "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
+  sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
+)
+
+# take a quick look at the data
+glimpse(veg)
+
+# save the data to your home directory
+readr::write_csv2(veg, here("veg_data_2022.csv"))
+
+# load the data from your home directory
+veg <- readr::read_csv2(here("veg_data_2022.csv"))
+glimpse(veg)
+
+# rename columns -----------------------------------------------------------------------------------
+
+veg %<>%
+  rename(
+    Fortlaufendenr = `...1`,
+    Aufnahmenr = Aufnahmenr.,
+    WissName = `Wissenschaftlicher Name`,
+    Hoehe.avg = `Höhe Ø (cm)`,
+    Hoehe.var = `Höhe σ²`,
+    Blattflaeche = `Blatt-Fläche (mm²)`,
+    Frischmasse = `Frischmasse (g)`,
+    Trockenmasse = `Trockenmasse (mg)`,
+    Germinulenmasse = `Germinulenmasse (mg)`,
+    Feuchte = `Ellenberg-Feuchte`,
+    Bodenfeuchte = `Bodenfeuchte (%vol)`
+  )
+
+# take a more detailed look at the data
+skimr::skim(veg)
+select(veg, -Fortlaufendenr, -Aufnahmenr, -WissName, -Anmerkungen) %>%
+  gtsummary::tbl_summary()
+
+# check data for errors ----------------------------------------------------------------------------
+
+# reassign expert water availability measures
+veg %<>% rows_update(tibble(Aufnahmenr = 1:8, WasserHH = c(1, 2, 3, 5, 6, 7, 8, 4)))
+
+# make sure plot-level data don't have duplicates
+select(veg, Aufnahmenr, Ort, Vegetation, WasserHH, Bodenfeuchte) %>%
+  n_distinct() # should equal the number of plots (n = 8)
+
+# make sure species are not duplicated within plots
+nrow(veg) # 141
+select(veg, Aufnahmenr, WissName) %>%
+  n_distinct() # should equal the number of rows (n = 141)
+
+# make sure species-level data (from databases) are unique
+select(veg, WissName) %>%
+  n_distinct() # 79
+select(veg, WissName, Lebensform, Germinulenmasse, Bestaeubung, Feuchte) %>%
+  n_distinct() # should equal the number of species (n = 79)
+
+# check life form
+table(veg$Lebensform)
+filter(veg, Lebensform == "G") %>%
+  pull(WissName) %>%
+  unique()
+veg %<>% mutate(Lebensform = case_when(
+  WissName == "Phalaris arundinacea" ~ "mL",
+  TRUE ~ Lebensform
+))
+
+# check % cover per species
+hist(veg$Deckung)
+
+# check total % cover per plot
+group_by(veg, Aufnahmenr) %>%
+  summarise(Deckung.tot = sum(Deckung))
+
+# check vegetation types at each site
+table(veg$Ort, veg$Vegetation)
+
+# check if Ellenberg humidity indicator values are integers
+unique(veg$Feuchte)
+unique(as.integer(veg$Feuchte))
+veg %<>%
+  mutate(
+    Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols
+      as.integer() # convert to integer
+  )
+
+# check taxon names
+tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE)
+View(tax)
+veg %<>%
+  mutate(
+    WissName = recode(
+      WissName,
+      `Taraxacum officinalis` = "Taraxacum officinale",
+      `Viola arvense` = "Viola arvensis",
+    )
+  )
+
+# check leaf area
+summary(veg$Blattflaeche)
+veg %<>% mutate(Blattflaeche = na_if(Blattflaeche, 0))
+
+# check seed mass
+summary(veg$Germinulenmasse)
+veg %>%
+  filter(Germinulenmasse == .001) %>%
+  pull(WissName) %>%
+  unique()
+View(available_tr8)
+gmasse_tr8 <- TR8::tr8(
+  species_list = c("Agrostis stolonifera", "Cerastium semidecandrum"),
+  download_list = c("seed_wght", "SeedMass", "seed_mass"),
+  allow_persistent = TRUE
+)
+gmasse_tr8
+veg %<>%
+  mutate(Germinulenmasse = case_when(
+    WissName == "Cerastium semidecandrum" ~ .04,
+    WissName == "Agrostis stolonifera" ~ .07,
+    TRUE ~ Germinulenmasse
+  ))
+
+# take a look at the data again
+skim(veg)
+
+# impute missing trait data ------------------------------------------------------------------------
+
+# life form (data from FloraWeb)
+select(veg, WissName, Lebensform) %>%
+  distinct() %>%
+  filter(is.na(Lebensform))
+veg %<>%
+  mutate(Lebensform = case_when(
+    WissName %in% c("Cerastium glomeratum", "Erophila verna") ~ "mL",
+    TRUE ~ Lebensform
+  ))
+
+# pollination type (data from FloraWeb)
+select(veg, WissName, Bestaeubung) %>%
+  distinct() %>%
+  filter(is.na(Bestaeubung))
+veg %<>%
+  mutate(Bestaeubung = case_when(
+    WissName %in% c(
+      "Cerastium glomeratum", "Erophila verna", "Valerianella spec."
+    ) ~ "se",
+    TRUE ~ Bestaeubung
+  ))
+
+# seed mass (data from Seed Information Database)
+select(veg, WissName, Germinulenmasse) %>%
+  distinct() %>%
+  filter(is.na(Germinulenmasse))
+veg %<>%
+  mutate(Germinulenmasse = case_when(
+    WissName == "Allium vineale" ~ 7,
+    WissName == "Carex ovalis" ~ .52,
+    WissName == "Cerastium glomeratum" ~ .05,
+    WissName == "Cerastium glutinosum" ~ .0654,
+    WissName == "Cruciata laevipes" ~ 3.5856,
+    WissName == "Erodium cicutarium" ~ 2,
+    WissName == "Erophila verna" ~ .024,
+    WissName == "Senecio erucifolius" ~ .3935,
+    WissName == "Saxifraga granulata" ~ .03,
+    WissName == "Prunus mahaleb" ~ 79.9,
+    WissName == "Polygonum persicaria" ~ 2.1,
+    TRUE ~ Germinulenmasse
+  ))
+summary(veg$Germinulenmasse)
+
+# calculate SLA & LDMC -----------------------------------------------------------------------------
+
+veg %<>%
+  mutate(
+    SLA = Blattflaeche / Trockenmasse,
+    LDMC = Trockenmasse / Frischmasse
+  )
+
+# check SLA
+hist(veg$SLA)
+
+# check LDMC
+hist(veg$LDMC)
+filter(veg, LDMC > 500) %>%
+  select(Fortlaufendenr, WissName, Deckung, Blattflaeche, Frischmasse, Trockenmasse, LDMC)
+
+# make corrections in the data
+veg %<>%
+  mutate(
+    Frischmasse = if_else(Fortlaufendenr == 127, .2063, Frischmasse),
+    Trockenmasse = if_else(Fortlaufendenr == 79, 85, Trockenmasse) # it's a guess!
+  )
+
+# recalculate SLA & LDMC
+veg %<>%
+  mutate(
+    SLA = Blattflaeche / Trockenmasse,
+    LDMC = Trockenmasse / Frischmasse
+  )
+
+# analyse species numbers --------------------------------------------------------------------------
+
+# check the number of species per plot
+artz1 <- table(veg$Aufnahmenr)
+artz1
+
+# check the number of species at each water availability level
+artz2 <- table(veg$WasserHH)
+artz2
+# test whether the observed species numbers differ from the expected
+chisq.test(artz2)
+chisq.test(artz2)$expected
+barplot(artz2,
+  xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = "Artenzahl"
+)
+
+# analyse water availability -----------------------------------------------------------------------
+
+# check the correspondence between the Ellenberg values and estimated water availability
+boxplot(veg$Feuchte ~ veg$WasserHH)
+
+# calculate the mean plot-level Ellenberg humidity indicator value
+WasserHH.Feuchte <- group_by(veg, Aufnahmenr, WasserHH, Bodenfeuchte) %>%
+  summarise(
+    # community mean
+    Feuchte_CM = mean(Feuchte, na.rm = TRUE) %>%
+      round(., 1),
+    # community weighted mean
+    Feuchte_CWM = weighted.mean(x = Feuchte, w = Deckung, na.rm = TRUE) %>%
+      round(., 1)
+  )
+
+# plot the plot-level Ellenberg humidity
+op <- par(mfcol = c(1, 2), mar = c(4, 4, 2, 2))
+plot(Feuchte_CWM ~ WasserHH,
+  data = WasserHH.Feuchte, col = "red",
+  pch = 20, ylab = "mittlere Feuchte nach Ellenberg"
+)
+points(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
+legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)
+
+plot(Feuchte_CWM ~ Bodenfeuchte, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "")
+points(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
+legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)
+par(op)
+
+# calculate correlation between the water availability and plot-level Ellenberg humidity
+cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, method = "kendall", exact = FALSE)
+cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE)
+cor.test(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CWM,
+  method = "kendall", exact = FALSE
+)
+
+# add the calculated mean Ellenberg humidity to the main data table
+veg %<>% left_join(WasserHH.Feuchte)
+glimpse(veg)
+
+# analyse LDMC -------------------------------------------------------------------------------------
+
+# LDMC = leaf dry matter content
+
+# inspect the distribution of LDMC visually
+hist(veg$LDMC)
+
+# plot LDMC across water availability levels
+boxplot(
+  LDMC ~ WasserHH,
+  dat = veg, ylab = "LDMC", xlab = "geschaetzte Wasserverfuegbarkeit (ordinal)",
+  col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
+)
+boxplot(LDMC ~ Feuchte_CWM,
+  dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg",
+  x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
+)
+ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + 
+  geom_point(na.rm = TRUE) +
+  stat_smooth(method = "lm", col = '#E69F00', se = FALSE, na.rm = TRUE) +
+  labs(x = "Bodenfeuchte [%]") +
+  theme_minimal()
+
+# perform regression analyses
+# using WasserHH as explanatory variable
+fm0.LDMC <- lm(LDMC ~ WasserHH, data = veg)
+summary(fm0.LDMC)
+performance::check_model(fm0.LDMC)
+
+fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover
+summary(fm1.LDMC)
+performance::check_model(fm1.LDMC)
+
+# compare the two models
+tbl_merge(
+  tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)),
+  tab_spanner = c("**Non-weighted**", "**Weigted**")
+)
+
+# using Bodenfeuchte as explanatory variable
+fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg)
+summary(fm0.LDMC.b)
+performance::check_model(fm0.LDMC.b)
+
+fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg)
+summary(fm1.LDMC.b)
+performance::check_model(fm1.LDMC.b)
+
+# compare the two models
+tbl_merge(
+  tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)),
+  tab_spanner = c("**Non-weighted**", "**Weigted**")
+)
+
+ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) + 
+  geom_point(na.rm = TRUE) +
+  geom_abline(intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2], 
+              col = '#E69F00', size = 1, na.rm = TRUE) +
+  geom_abline(intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2], 
+              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  labs(x = "Bodenfeuchte [%]") +
+  theme_minimal() +
+  annotate("text", x = 20, y = 270, label = "Non-weighted", col = '#E69F00') +
+  annotate("text", x = 25, y = 320, label = "Weighted", col = '#56B4E9')
+
+# analyse SLA --------------------------------------------------------------------------------------
+
+# SLA = specific leaf area
+
+# inspect the distribution of SLA visually
+hist(veg$SLA)
+hist(log(veg$SLA))
+
+# test the normality of distribution
+shapiro_test(veg$SLA)
+shapiro_test(log(veg$SLA))
+
+# add log-transformed values to the main data table
+veg %<>% mutate(SLA.log = log(SLA))
+
+# plot the distribution of SLA across water availability levels
+op <- par(mar = c(4, 5, 2, 3))
+boxplot(SLA ~ WasserHH,
+  dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = "SLA", col = "blue", log = "y"
+)
+filter(veg, WasserHH == 2 & SLA > 25) %>%
+  select(WissName, Aufnahmenr, SLA)
+filter(veg, WissName == "Euphorbia cyparissias") %>%
+  select(WissName, Aufnahmenr, SLA)
+
+boxplot(SLA.log ~ WasserHH,
+  dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = expression(log("SLA")), col = "blue"
+)
+par(op)
+filter(veg, WasserHH == 7 & SLA.log < 3) %>%
+  select(WissName, Aufnahmenr, SLA)
+
+# perform regression analyses
+fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg)
+summary(fm1.SLA)
+performance::check_model(fm1.SLA)
+
+fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg)
+summary(fm1.SLA.log)
+performance::check_model(fm1.SLA.log)
+
+fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg)
+summary(fm1.SLA.log.b)
+performance::check_model(fm1.SLA.log.b)
+
+# plot the regression line
+ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) + 
+  geom_point(na.rm = TRUE) +
+  geom_abline(intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2], 
+              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) +
+  theme_minimal()
+
+# look at the relationship between SLA and LDMC
+plot(SLA ~ LDMC, data = veg)
+cor.test(veg$SLA, veg$LDMC, use = "complete.obs")
+
+# analyse height -----------------------------------------------------------------------------------
+
+# inspect distributions visually
+hist(veg$Hoehe.avg)
+hist(log(veg$Hoehe.avg))
+
+# test the normality of distribution
+shapiro_test(veg$Hoehe.avg)
+shapiro_test(log(veg$Hoehe.avg))
+
+# add log-transformed values to the main data table
+veg %<>% mutate(Hoehe.avg.log = log(Hoehe.avg))
+
+# plot the distribution of height across water availability levels
+op <- par(mar = c(4, 5, 2, 3))
+boxplot(Hoehe.avg ~ WasserHH,
+  data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = "Wuchshöhe [cm]", col = rainbow(7, start = 1, end = 5 / 7),
+  log = "y"
+)
+
+filter(veg, WasserHH == 5 & Hoehe.avg > 50) %>%
+  select(WissName, Aufnahmenr, Hoehe.avg)
+filter(veg, WasserHH == 7 & Hoehe.avg < 20) %>%
+  select(WissName, Aufnahmenr, Hoehe.avg)
+
+boxplot(Hoehe.avg.log ~ WasserHH,
+  data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = expression(log("Wuchshöhe [cm]")), col = rainbow(7, start = 1, end = 5 / 7)
+)
+par(op)
+
+# perform regression analysis
+fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg)
+summary(fm1.hoehe.avg.log)
+performance::check_model(fm1.hoehe.avg.log)
+
+fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg)
+summary(fm1.hoehe.avg.log.b)
+performance::check_model(fm1.hoehe.avg.log.b)
+
+# plot the regression line
+ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) + 
+  geom_point(na.rm = TRUE) +
+  geom_abline(intercept = fm1.hoehe.avg.log.b$coefficients[1], 
+              slope = fm1.hoehe.avg.log.b$coefficients[2], 
+              col = '#56B4E9', size = 1, na.rm = TRUE) +
+  geom_smooth(col = "red", se = FALSE, na.rm = TRUE) +
+  labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) +
+  theme_minimal()
+
+# analyse height variance --------------------------------------------------------------------------
+
+# plot height variance
+hist(veg$Hoehe.var)
+hist(log(veg$Hoehe.var))
+
+# add log-transformed values to the main data table
+veg %<>% mutate(Hoehe.var.log = log(Hoehe.var))
+
+# check the relationship between height and height variance
+boxplot(Hoehe.var.log ~ WasserHH, data = veg)
+plot(Hoehe.var.log ~ Hoehe.avg, data = veg)
+cor.test(veg$Hoehe.var.log, veg$Hoehe.avg)
+
+# calculate the coefficient of variation
+veg %<>% mutate(Hoehe.cv = sqrt(Hoehe.var) / Hoehe.avg * 100)
+
+hist(veg$Hoehe.cv)
+hist(log(veg$Hoehe.cv))
+
+veg %<>% mutate(Hoehe.cv.log = log(Hoehe.cv))
+
+plot(Hoehe.cv.log ~ Hoehe.avg, data = veg)
+cor.test(veg$Hoehe.cv.log, veg$Hoehe.avg)
+
+boxplot(Hoehe.cv ~ WasserHH,
+  data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1]
+)
+filter(veg, WasserHH == 3 & Hoehe.cv > 100) %>%
+  select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv)
+
+# perform regression analysis
+fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung)
+summary(fm1.Hoehe.cv.log)
+performance::check_model(fm1.Hoehe.cv.log)
+
+fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung)
+summary(fm1.Hoehe.cv.log.b)
+performance::check_model(fm1.Hoehe.cv.log.b)
+
+# analyse seed mass --------------------------------------------------------------------------------
+
+hist(veg$Germinulenmasse)
+hist(log(veg$Germinulenmasse))
+
+veg %<>% mutate(Germinulenmasse.log = log(Germinulenmasse))
+
+op <- par(mar = c(4, 5, 2, 3))
+boxplot(Germinulenmasse ~ WasserHH, data = veg, log = "y")
+boxplot(Germinulenmasse.log ~ WasserHH, data = veg, ylab = expression(log("Germinulenmasse")))
+filter(veg, Germinulenmasse.log > 4) %>%
+  select(WissName, Germinulenmasse)
+par(op)
+
+# perform regression analysis
+fm1.gmasse.log <- lm(Germinulenmasse.log ~ WasserHH, data = veg, weight = Deckung)
+summary(fm1.gmasse.log)
+check_model(fm1.gmasse.log)
+
+fm1.gmasse.log.b <- lm(Germinulenmasse.log ~ Bodenfeuchte, data = veg, weight = Deckung)
+summary(fm1.gmasse.log.b)
+check_model(fm1.gmasse.log.b)
+
+# analyse life form --------------------------------------------------------------------------------
+
+# compare the distributions of life forms across vegetation types
+select(veg, Lebensform, Vegetation) %>%
+  gtsummary::tbl_summary(by = Vegetation) %>%
+  add_p()
+
+# calculate total % cover per life form across humidity levels
+lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>%
+  summarise(Deckung = sum(Deckung))
+head(lft.1)
+tail(lft.1)
+
+lft.2 <- group_by(lft.1, Lebensform, WasserHH) %>%
+  summarise(Deckung = mean(Deckung), .groups = "drop")
+head(lft.2)
+
+lft.3 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.2)
+lft.3 <- DescTools::as.matrix.xtabs(lft.3)
+lft.3
+chisq_test(lft.3, simulate.p.value = TRUE)
+
+# plot the distribution of life forms across humidity levels
+# using base R
+op <- par(mar = c(4, 5, 2, 3))
+barplot(lft.3,
+  xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
+  ylab = expression(sum("Deckung")),
+  main = "Lebensformenspektren", ylim = c(0, 160)
+)
+legend(5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
+par(op)
+
+# using ggplot2
+ggplot(data = lft.2, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
+  geom_bar(position = "stack", stat = "identity") +
+  viridis::scale_fill_viridis(discrete = TRUE) +
+  labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
+  ggtitle("Lebensformenspektren") +
+  theme_minimal()
+
+# analyse pollination type -------------------------------------------------------------------------
+
+# compare the distributions of pollination syndromes across vegetation types
+select(veg, Bestaeubung, Vegetation) %>%
+  gtsummary::tbl_summary(by = Vegetation) %>%
+  add_p()
+
+# calculate total % cover per pollination type across humidity levels
+bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>%
+  summarise(Deckung = sum(Deckung))
+head(bst.1)
+tail(bst.1)
+
+bst.2 <- group_by(bst.1, Bestaeubung, WasserHH) %>%
+  summarise(Deckung = mean(Deckung), .groups = "drop")
+head(bst.2)
+
+bst.3 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.2)
+bst.3 <- DescTools::as.matrix.xtabs(bst.3)
+bst.3
+chisq_test(bst.3, simulate.p.value = TRUE)
+
+# plot the distribution of pollination types
+# using base R
+op <- par(mar = c(4, 5, 2, 3))
+cols <- c("yellow", "brown", "grey", "lightblue")
+barplot(bst.3,
+  xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")),
+  main = "Bestäubungstypenspektren",
+  sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols
+)
+legend(0, 140,
+  legend = c("Wind", "Selbst", "multipel", "Insekten"),
+  pch = 15, col = cols[4:1], cex = 0.7
+)
+
+# using ggplot2
+ggplot(data = bst.2, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
+  geom_bar(position = "stack", stat = "identity") +
+  viridis::scale_fill_viridis(
+    discrete = TRUE, labels = c("Insekten", "multipel", "Selbst", "Wind")
+  ) +
+  labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
+  ggtitle("Bestäubungstypenspektren") +
+  theme_minimal()
+
+# analyse the relationships between traits ---------------------------------------------------------
+
+# test if height varies across life forms
+# plot height by life form
+ggpubr::ggboxplot(veg, x = "Lebensform", y = "Hoehe.avg.log", select = c("H", "G", "T", "mL"))
+
+# check the ANOVA assumptions
+# outliers
+group_by(veg, Lebensform) %>%
+  identify_outliers(Hoehe.avg.log)
+# normality
+lm.hoe.lf <- lm(Hoehe.avg.log ~ Lebensform, data = veg)
+ggpubr::ggqqplot(residuals(lm.hoe.lf))
+rstatix::shapiro_test(residuals(lm.hoe.lf))
+# homogeneity of variances
+levene_test(veg, Hoehe.avg.log ~ Lebensform)
+
+# perform ANOVA and the Kruskal-Wallis test (non-parametric alternative)
+anova_test(veg, Hoehe.avg.log ~ Lebensform)
+kruskal_test(veg, Hoehe.avg.log ~ Lebensform)
+
+
+# test if the height varies across pollination types
+ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log")
+group_by(veg, Bestaeubung) %>%
+  identify_outliers(Hoehe.avg.log) %>%
+  select(Fortlaufendenr, WissName, Hoehe.avg, Hoehe.avg.log, is.outlier, is.extreme)
+lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
+ggqqplot(residuals(lm.hoe.best))
+shapiro_test(residuals(lm.hoe.best))
+levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
+hoe.best.aov <- anova_test(veg, Hoehe.avg.log ~ Bestaeubung)
+
+# perform a post-hoc test (i.e. pairwise comparisons)
+hoe.best.pwc <- tukey_hsd(veg, Hoehe.avg.log ~ Bestaeubung)
+hoe.best.pwc
+
+# plot the results
+hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung")
+ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
+  stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) +
+  labs(
+    subtitle = get_test_label(hoe.best.aov, detailed = TRUE),
+    caption = get_pwc_label(hoe.best.pwc),
+    y = expression(log("Wuchshöhe")),
+    x = "Bestäubungstyp"
+  )
+
+
+# test if the SLA varies across life forms
+ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL"))
+group_by(veg, Lebensform) %>%
+  identify_outliers(SLA.log)
+lm.sla.lf <- lm(SLA.log ~ Lebensform, data = veg)
+ggqqplot(residuals(lm.sla.lf))
+shapiro_test(residuals(lm.sla.lf))
+levene_test(veg, SLA.log ~ Lebensform)
+anova_test(veg, SLA.log ~ Lebensform)
+
+
+# test if the seed mass varies across life forms
+gmasse <- select(veg, WissName, Germinulenmasse.log, Lebensform, Bestaeubung) %>%
+  distinct()
+ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log")
+group_by(gmasse, Lebensform) %>%
+  identify_outliers(Germinulenmasse.log)
+lm.gmasse.lf <- lm(Germinulenmasse.log ~ Lebensform, data = gmasse)
+ggqqplot(residuals(lm.gmasse.lf))
+shapiro_test(residuals(lm.gmasse.lf))
+levene_test(gmasse, Germinulenmasse.log ~ Lebensform)
+gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform)
+gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform)
+gmasse.lf.pwc
+
+gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform")
+ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
+  stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) +
+  labs(
+    subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE),
+    caption = get_pwc_label(gmasse.lf.pwc),
+    y = expression(log("Germinulenmasse")),
+    x = "Lebensform"
+  )
+
+
+# test if the seed mass varies across pollination types
+ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log")
+group_by(gmasse, Bestaeubung) %>%
+  identify_outliers(Germinulenmasse.log)
+fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse)
+ggpubr::ggqqplot(residuals(fm.gmasse.best))
+rstatix::shapiro_test(residuals(fm.gmasse.best))
+ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung")
+gmasse.best.aov <- anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
+gmasse.best.aov
+gmasse.best.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Bestaeubung)
+gmasse.best.pwc
+
+gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung")
+ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") +
+  stat_pvalue_manual(gmasse.best.pwc, hide.ns = TRUE) +
+  labs(
+    subtitle = get_test_label(gmasse.best.aov, detailed = TRUE),
+    caption = get_pwc_label(gmasse.best.pwc),
+    y = expression(log("Germinulenmasse")),
+    x = "Bestäubungstyp"
+  )
+
+# analyse species composition ----------------------------------------------------------------------
+
+mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
+str(mat)
+
+# perform correspondence analysis
+decorana(mat)
+
+fm.ca <- cca(mat)
+fm.ca
+summary(fm.ca)
+plot(fm.ca, display = "sites")
+plot(fm.ca)
+
+# shorten taxon names
+wname <- str_remove(veg$WissName, pattern = " cf\\.| x") %>%
+  str_split(pattern = " ", simplify = TRUE) %>%
+  substr(., 1, 3)
+wname <- paste(wname[, 1], wname[, 2], sep = ".")
+veg %<>% mutate(wname = wname)
+
+# repeat the same analysis with shortened taxon names provided
+mat <- xtabs(Deckung ~ Aufnahmenr + wname, data = veg)
+fm.ca <- cca(mat)
+summary(fm.ca)
+
+# plot the results
+summary(fm.ca)$species %>%
+  as_tibble() %>%
+  mutate(wname = rownames(summary(fm.ca)$species)) %>%
+  ggplot(aes(x = CA1, y = CA2, label = wname)) +
+  geom_point() +
+  ggrepel::geom_text_repel(size = 1.5, force = 15, max.overlaps = 30) +
+  theme_bw()
+
+# prepare data for trait ordination ----------------------------------------------------------------
+
+# calculate the proportion of each life form per plot
+lf.kt <- select(veg, Aufnahmenr, Deckung, Lebensform) %>%
+  pivot_wider(
+    names_from = Lebensform, values_from = Deckung,
+    values_fn = sum, values_fill = 0, names_prefix = "lf."
+  ) %>%
+  mutate(total = rowSums(.[, -1])) %>%
+  mutate(across(lf.H:lf.M, ~ . / total), .keep = "unused")
+
+# calculate the proportion of each pollination type per plot
+best.kt <- select(veg, Aufnahmenr, Deckung, Bestaeubung) %>%
+  pivot_wider(
+    names_from = Bestaeubung, values_from = Deckung,
+    values_fn = sum, values_fill = 0, names_prefix = "b."
+  ) %>%
+  mutate(total = rowSums(.[, -1])) %>%
+  mutate(across(b.wi:b.mb, ~ . / total), .keep = "unused")
+
+# compute community weighted means for numerical traits
+cwm <- group_by(veg, Aufnahmenr) %>%
+  summarise(across(
+    c(Hoehe.avg.log, Hoehe.cv.log, LDMC, SLA.log, Germinulenmasse.log),
+    ~ weighted.mean(., w = Deckung, na.rm = TRUE)
+  )) %>%
+  rename(
+    Ho.av = Hoehe.avg.log,
+    Ho.cv = Hoehe.cv.log,
+    SLA = SLA.log,
+    GM = Germinulenmasse.log
+  )
+
+# combine the three tables
+cwm %<>%
+  left_join(lf.kt) %>%
+  left_join(best.kt)
+cwm
+
+# perform principal components analysis (PCA) of CWMs ----------------------------------------------
+
+# analyse all traits
+pca1 <- FactoMineR::PCA(cwm[, -c(1:2)])
+summary(pca1)
+
+cowplot::plot_grid(
+  plot.PCA(pca1),
+  plot.PCA(pca1, c(1, 3))
+)
+
+# analyse numerical traits only
+colnames(cwm)
+pca2 <- PCA(select(cwm, Ho.av:GM))
+cowplot::plot_grid(
+  plot.PCA(pca2),
+  plot.PCA(pca2, c(1, 3))
+)
+
+# perform PCA of species traits --------------------------------------------------------------------
+
+# create a separate table containing numerical traits
+dat <- select(
+  veg, WissName, Vegetation, Blattflaeche, Frischmasse, Trockenmasse,
+  SLA, LDMC, Hoehe.avg, Hoehe.var, Germinulenmasse
+) %>%
+  rename(
+    BlF = Blattflaeche,
+    FrM = Frischmasse,
+    TrM = Trockenmasse,
+    Ho.av = Hoehe.avg,
+    Ho.var = Hoehe.var,
+    GM = Germinulenmasse
+  ) %>%
+  na.omit()
+glimpse(dat)
+
+# perform PCA
+pca3 <- FactoMineR::PCA(dat[, -c(1:2)])
+summary(pca3)
+plot.PCA(pca3)
+
+# add PC to the trait table
+dat$pc1 <- pca3$ind$coord[, 1]
+dat$pc2 <- pca3$ind$coord[, 2]
+pc12 <- pca3$ind$coord[, 1:2]
+
+# plot the PCA results
+ggplot(data = dat, aes(x = pc1, y = pc2, color = Vegetation, shape = Vegetation)) +
+  geom_hline(yintercept = 0, lty = 2) +
+  geom_vline(xintercept = 0, lty = 2) +
+  geom_point(alpha = .8) +
+  stat_ellipse(
+    geom = "polygon", aes(fill = Vegetation), alpha = .2,
+    show.legend = FALSE, level = .95
+  ) +
+  guides(
+    color = guide_legend(title = "Vegetation"),
+    shape = guide_legend(title = "Vegetation")
+  ) +
+  labs(x = "PC 1 (45.24%)", y = "PC 2 (20.27%)") +
+  theme_minimal() +
+  theme(
+    panel.grid = element_blank(),
+    panel.border = element_rect(fill = "transparent"),
+    legend.position = "bottom"
+  )
+
+# plot PCA results using kernels
+# Code from Björn Reu
+# Used to produce Fig. 2 in Diaz et al. 2016, Nature
+
+H <- Hpi(x = pc12) # optimal bandwidth estimation
+est <- kde(x = pc12, H = H, compute.cont = TRUE) # kernel density estimation
+
+# set contour probabilities for drawing contour levels
+cl <- contourLevels(est, prob = c(.5, .05, .001), approx = TRUE)
+
+# use envfit for drawing arrows, can be also done using trait loadings
+fit <- envfit(pc12, select(dat, BlF:GM))
+
+# create a plot
+par(mar = c(4, 4, 2, 2))
+cols <- ifelse(dat$Vegetation == "TR", "darkred", "darkblue")
+plot(est,
+  cont = seq(1, 100, by = 1), display = "filled.contour2", add = FALSE, ylab = "", xlab = "",
+  cex.axis = .75, ylim = c(-5, 4), xlim = c(-3, 7), las = 1
+)
+plot(est, abs.cont = cl[1], labels = .5, labcex = .75, add = TRUE, lwd = .75, col = "grey30")
+plot(est, abs.cont = cl[2], labels = .95, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
+plot(est, abs.cont = cl[3], labels = .99, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
+points(pc12, pch = 16, cex = .5, col = cols)
+plot(fit, cex = .90, col = 1)
+mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8)
+mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8)
+
+dat[dat$pc1 > 4, ]
+dat[dat$pc2 < -3, ]
No results found