Skip to content

Instantly share code, notes, and snippets.

@MarinaGolivets
Last active July 13, 2022 06:37
Show Gist options
  • Select an option

  • Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.

Select an option

Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.

Revisions

  1. MarinaGolivets revised this gist Jul 13, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion GP_Oekologie_2022.R
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ for (p in pkgs) {
    library(p, character.only = TRUE)
    }

    # choose an option to treat warnings as errors
    # choose an option to not treat warnings as errors
    options(warn = 1)

    # check what packages are being used on your machine
  2. MarinaGolivets revised this gist Jul 13, 2022. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions GP_Oekologie_2022.R
    Original file line number Diff line number Diff line change
    @@ -772,6 +772,8 @@ write_csv2(veg, here("veg_data_2020_edited.csv"))

    # analyse species composition ----------------------------------------------------------------------

    veg <- read_csv2(here("veg_data_2022_edited.csv"))

    mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
    str(mat)

  3. MarinaGolivets revised this gist Jul 12, 2022. 1 changed file with 16 additions and 6 deletions.
    22 changes: 16 additions & 6 deletions GP_Oekologie_2022.R
    Original file line number Diff line number Diff line change
    @@ -50,6 +50,7 @@ sessionInfo()
    here()

    # load data ----------------------------------------------------------------------------------------

    if (length(list.files(here(), "veg_data_2022.csv")) == 0) {
    # load the data directly from the Google sheet
    veg <- googlesheets4::read_sheet(
    @@ -664,6 +665,8 @@ levene_test(veg, Hoehe.avg.log ~ Lebensform)

    # perform ANOVA and the Kruskal-Wallis test (non-parametric alternative)
    anova_test(veg, Hoehe.avg.log ~ Lebensform)
    # because the normality of residuals assumption was violated
    # the Kruskal Wallis test should be used instead of a standard ANOVA
    kruskal_test(veg, Hoehe.avg.log ~ Lebensform)


    @@ -677,10 +680,12 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
    ggqqplot(residuals(lm.hoe.best))
    shapiro_test(residuals(lm.hoe.best))
    levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)


    # perform a post-hoc test (i.e. pairwise comparisons)
    # because the normality of residuals assumption was violated
    # we use the Kruskal Wallis test instead of a standard ANOVA
    # and the Dunn's post hoc test for pairwise comparisons
    hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.kw
    hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.pwc

    @@ -742,9 +747,14 @@ fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse)
    ggpubr::ggqqplot(residuals(fm.gmasse.best))
    rstatix::shapiro_test(residuals(fm.gmasse.best))
    ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung")
    gmasse.best.aov <- anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
    levene_test(gmasse, Germinulenmasse.log ~ Bestaeubung)

    # because the homogeneity of variance assumption was violated
    # we use the Welch test instead of a standard ANOVA
    # and the Games-Howell post hoc test for pairwise comparisons
    gmasse.best.aov <- welch_anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
    gmasse.best.aov
    gmasse.best.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Bestaeubung)
    gmasse.best.pwc <- games_howell_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
    gmasse.best.pwc

    gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung")
    @@ -931,4 +941,4 @@ mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8)
    mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8)

    dat[dat$pc1 > 4, ]
    dat[dat$pc2 < -3, ]
    dat[dat$pc2 < -3, ]
  4. MarinaGolivets revised this gist Jul 12, 2022. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions GP_Oekologie_2022.R
    Original file line number Diff line number Diff line change
    @@ -757,6 +757,9 @@ ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") +
    x = "Bestäubungstyp"
    )

    # save the edited data as a separate file
    write_csv2(veg, here("veg_data_2020_edited.csv"))

    # analyse species composition ----------------------------------------------------------------------

    mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
  5. MarinaGolivets revised this gist Jul 12, 2022. 1 changed file with 89 additions and 74 deletions.
    163 changes: 89 additions & 74 deletions GP_Oekologie_2022.R
    Original file line number Diff line number Diff line change
    @@ -29,9 +29,11 @@ pkgs <- c(
    )

    # install (if not installed) and load all the packages
    install.packages("devtools")
    devtools::install_github("ddsjoberg/gtsummary")
    devtools::install_github("ropensci/skimr")

    # # run only once!
    # install.packages("devtools")
    # devtools::install_github("ddsjoberg/gtsummary")
    # devtools::install_github("ropensci/skimr")

    for (p in pkgs) {
    if (!require(p, character.only = TRUE)) install.packages(p)
    @@ -48,23 +50,22 @@ sessionInfo()
    here()

    # load data ----------------------------------------------------------------------------------------

    # load the data directly from the Google sheet
    veg <- googlesheets4::read_sheet(
    "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
    sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
    )
    if (length(list.files(here(), "veg_data_2022.csv")) == 0) {
    # load the data directly from the Google sheet
    veg <- googlesheets4::read_sheet(
    "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
    sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
    )
    # save the data to your home directory
    readr::write_csv2(veg, here("veg_data_2022.csv"))
    } else {
    # load the data from your home directory
    veg <- readr::read_csv2(here("veg_data_2022.csv"))
    }

    # take a quick look at the data
    glimpse(veg)

    # save the data to your home directory
    readr::write_csv2(veg, here("veg_data_2022.csv"))

    # load the data from your home directory
    veg <- readr::read_csv2(here("veg_data_2022.csv"))
    glimpse(veg)

    # rename columns -----------------------------------------------------------------------------------

    veg %<>%
    @@ -135,6 +136,7 @@ veg %<>%
    Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols
    as.integer() # convert to integer
    )
    veg[veg$Fortlaufendenr == 41, "Feuchte"] <- NA

    # check taxon names
    tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE)
    @@ -144,9 +146,10 @@ veg %<>%
    WissName = recode(
    WissName,
    `Taraxacum officinalis` = "Taraxacum officinale",
    `Viola arvense` = "Viola arvensis",
    `Viola arvense` = "Viola arvensis"
    )
    )
    veg[veg$Fortlaufendenr == 41, "WissName"] <- "Agrostis capillaris"

    # check leaf area
    summary(veg$Blattflaeche)
    @@ -169,6 +172,7 @@ veg %<>%
    mutate(Germinulenmasse = case_when(
    WissName == "Cerastium semidecandrum" ~ .04,
    WissName == "Agrostis stolonifera" ~ .07,
    Fortlaufendenr == 41 ~ .1,
    TRUE ~ Germinulenmasse
    ))

    @@ -220,6 +224,9 @@ veg %<>%
    ))
    summary(veg$Germinulenmasse)

    # convert life form and pollination to factor
    veg %<>% mutate(across(c(Lebensform, Bestaeubung), ~ as.factor(.)))

    # calculate SLA & LDMC -----------------------------------------------------------------------------

    veg %<>%
    @@ -325,9 +332,9 @@ boxplot(LDMC ~ Feuchte_CWM,
    dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg",
    x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
    )
    ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    geom_point(na.rm = TRUE) +
    stat_smooth(method = "lm", col = '#E69F00', se = FALSE, na.rm = TRUE) +
    stat_smooth(method = "lm", col = "#E69F00", se = FALSE, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]") +
    theme_minimal()

    @@ -339,39 +346,44 @@ performance::check_model(fm0.LDMC)

    fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover
    summary(fm1.LDMC)
    performance::check_model(fm1.LDMC)
    check_model(fm1.LDMC)

    # compare the two models
    tbl_merge(
    tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)),
    tab_spanner = c("**Non-weighted**", "**Weigted**")
    )

    # using Bodenfeuchte as explanatory variable
    # using soil moisture (Bodenfeuchte) as explanatory variable
    fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg)
    summary(fm0.LDMC.b)
    performance::check_model(fm0.LDMC.b)
    check_model(fm0.LDMC.b)

    fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.LDMC.b)
    performance::check_model(fm1.LDMC.b)
    check_model(fm1.LDMC.b)

    # compare the two models
    tbl_merge(
    tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)),
    tab_spanner = c("**Non-weighted**", "**Weigted**")
    )

    ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    plot1 <- ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2],
    col = '#E69F00', size = 1, na.rm = TRUE) +
    geom_abline(intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    geom_abline(
    intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2],
    col = "#E69F00", size = 1, na.rm = TRUE
    ) +
    geom_abline(
    intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2],
    col = "#56B4E9", size = 1, na.rm = TRUE
    ) +
    labs(x = "Bodenfeuchte [%]") +
    theme_minimal() +
    annotate("text", x = 20, y = 270, label = "Non-weighted", col = '#E69F00') +
    annotate("text", x = 25, y = 320, label = "Weighted", col = '#56B4E9')
    annotate("text", x = 20, y = 270, label = "Non-weighted", col = "#E69F00") +
    annotate("text", x = 25, y = 320, label = "Weighted", col = "#56B4E9")
    plot1

    # analyse SLA --------------------------------------------------------------------------------------

    @@ -410,23 +422,26 @@ filter(veg, WasserHH == 7 & SLA.log < 3) %>%
    # perform regression analyses
    fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.SLA)
    performance::check_model(fm1.SLA)
    check_model(fm1.SLA)

    fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.SLA.log)
    performance::check_model(fm1.SLA.log)
    check_model(fm1.SLA.log)

    fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.SLA.log.b)
    performance::check_model(fm1.SLA.log.b)
    check_model(fm1.SLA.log.b)

    # plot the regression line
    ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) +
    plot2 <- ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    geom_abline(
    intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2],
    col = "#56B4E9", size = 1, na.rm = TRUE
    ) +
    labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) +
    theme_minimal()
    plot2

    # look at the relationship between SLA and LDMC
    plot(SLA ~ LDMC, data = veg)
    @@ -453,9 +468,9 @@ boxplot(Hoehe.avg ~ WasserHH,
    log = "y"
    )

    filter(veg, WasserHH == 5 & Hoehe.avg > 50) %>%
    filter(veg, WasserHH == 6 & Hoehe.avg > 50) %>%
    select(WissName, Aufnahmenr, Hoehe.avg)
    filter(veg, WasserHH == 7 & Hoehe.avg < 20) %>%
    filter(veg, WasserHH == 8 & Hoehe.avg < 20) %>%
    select(WissName, Aufnahmenr, Hoehe.avg)

    boxplot(Hoehe.avg.log ~ WasserHH,
    @@ -467,21 +482,24 @@ par(op)
    # perform regression analysis
    fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.hoehe.avg.log)
    performance::check_model(fm1.hoehe.avg.log)
    check_model(fm1.hoehe.avg.log)

    fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.hoehe.avg.log.b)
    performance::check_model(fm1.hoehe.avg.log.b)
    check_model(fm1.hoehe.avg.log.b)

    # plot the regression line
    ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) +
    plot3 <- ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm1.hoehe.avg.log.b$coefficients[1],
    slope = fm1.hoehe.avg.log.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    geom_abline(
    intercept = fm1.hoehe.avg.log.b$coefficients[1],
    slope = fm1.hoehe.avg.log.b$coefficients[2],
    col = "#56B4E9", size = 1, na.rm = TRUE
    ) +
    geom_smooth(col = "red", se = FALSE, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) +
    theme_minimal()
    plot3

    # analyse height variance --------------------------------------------------------------------------

    @@ -512,17 +530,17 @@ boxplot(Hoehe.cv ~ WasserHH,
    data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1]
    )
    filter(veg, WasserHH == 3 & Hoehe.cv > 100) %>%
    filter(veg, WasserHH == 4 & Hoehe.cv > 100) %>%
    select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv)

    # perform regression analysis
    fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung)
    summary(fm1.Hoehe.cv.log)
    performance::check_model(fm1.Hoehe.cv.log)
    check_model(fm1.Hoehe.cv.log)

    fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung)
    summary(fm1.Hoehe.cv.log.b)
    performance::check_model(fm1.Hoehe.cv.log.b)
    check_model(fm1.Hoehe.cv.log.b)

    # analyse seed mass --------------------------------------------------------------------------------

    @@ -560,14 +578,10 @@ lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>%
    head(lft.1)
    tail(lft.1)

    lft.2 <- group_by(lft.1, Lebensform, WasserHH) %>%
    summarise(Deckung = mean(Deckung), .groups = "drop")
    head(lft.2)

    lft.3 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.2)
    lft.3 <- DescTools::as.matrix.xtabs(lft.3)
    lft.3
    chisq_test(lft.3, simulate.p.value = TRUE)
    lft.2 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.1)
    lft.2 <- DescTools::as.matrix.xtabs(lft.3)
    lft.2
    chisq_test(lft.2, simulate.p.value = TRUE)

    # plot the distribution of life forms across humidity levels
    # using base R
    @@ -577,11 +591,11 @@ barplot(lft.3,
    ylab = expression(sum("Deckung")),
    main = "Lebensformenspektren", ylim = c(0, 160)
    )
    legend(5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
    legend(8.5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
    par(op)

    # using ggplot2
    ggplot(data = lft.2, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
    ggplot(data = lft.1, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
    geom_bar(position = "stack", stat = "identity") +
    viridis::scale_fill_viridis(discrete = TRUE) +
    labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
    @@ -601,14 +615,10 @@ bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>%
    head(bst.1)
    tail(bst.1)

    bst.2 <- group_by(bst.1, Bestaeubung, WasserHH) %>%
    summarise(Deckung = mean(Deckung), .groups = "drop")
    head(bst.2)

    bst.3 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.2)
    bst.3 <- DescTools::as.matrix.xtabs(bst.3)
    bst.3
    chisq_test(bst.3, simulate.p.value = TRUE)
    bst.2 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.1)
    bst.2 <- DescTools::as.matrix.xtabs(bst.2)
    bst.2
    chisq_test(bst.2, simulate.p.value = TRUE)

    # plot the distribution of pollination types
    # using base R
    @@ -620,19 +630,20 @@ barplot(bst.3,
    sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols
    )
    legend(0, 140,
    legend = c("Wind", "Selbst", "multipel", "Insekten"),
    legend = c("Wind", "Selbst", "multiple", "Insekten"),
    pch = 15, col = cols[4:1], cex = 0.7
    )

    # using ggplot2
    ggplot(data = bst.2, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
    plot4 <- ggplot(data = bst.1, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
    geom_bar(position = "stack", stat = "identity") +
    viridis::scale_fill_viridis(
    discrete = TRUE, labels = c("Insekten", "multipel", "Selbst", "Wind")
    discrete = TRUE, labels = c("Insekten", "multiple", "Selbst", "Wind")
    ) +
    labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
    ggtitle("Bestäubungstypenspektren") +
    theme_minimal()
    plot4

    # analyse the relationships between traits ---------------------------------------------------------

    @@ -657,6 +668,7 @@ kruskal_test(veg, Hoehe.avg.log ~ Lebensform)


    # test if the height varies across pollination types
    veg$Bestaeubung <- as.factor(veg$Bestaeubung)
    ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log")
    group_by(veg, Bestaeubung) %>%
    identify_outliers(Hoehe.avg.log) %>%
    @@ -665,23 +677,24 @@ lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
    ggqqplot(residuals(lm.hoe.best))
    shapiro_test(residuals(lm.hoe.best))
    levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.aov <- anova_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)


    # perform a post-hoc test (i.e. pairwise comparisons)
    hoe.best.pwc <- tukey_hsd(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.pwc

    # plot the results
    hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung")
    ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
    plot6 <- ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
    stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) +
    labs(
    subtitle = get_test_label(hoe.best.aov, detailed = TRUE),
    subtitle = get_test_label(hoe.best.kw, detailed = TRUE),
    caption = get_pwc_label(hoe.best.pwc),
    y = expression(log("Wuchshöhe")),
    x = "Bestäubungstyp"
    )

    plot6

    # test if the SLA varies across life forms
    ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL"))
    @@ -705,18 +718,20 @@ ggqqplot(residuals(lm.gmasse.lf))
    shapiro_test(residuals(lm.gmasse.lf))
    levene_test(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.aov
    gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.pwc

    gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform")
    ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
    plot7 <- ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
    stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) +
    labs(
    subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE),
    caption = get_pwc_label(gmasse.lf.pwc),
    y = expression(log("Germinulenmasse")),
    x = "Lebensform"
    )
    plot7


    # test if the seed mass varies across pollination types
  6. MarinaGolivets renamed this gist Jul 11, 2022. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  7. MarinaGolivets created this gist Jul 11, 2022.
    916 changes: 916 additions & 0 deletions GP Oekologie 2022
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,916 @@
    # GP Oekologie 2022
    # Ingolf Kühn & Marina Golivets


    # install & load R packages ------------------------------------------------------------------------

    rm(list = ls())

    # packages that we need for analyses
    pkgs <- c(
    "here", # for handling working directory
    "googlesheets4", # for loading data from Google spreadsheets
    "tidyverse", # for data manipulation
    "magrittr", # for piping
    "skimr", # for summarizing data
    "gtsummary", # for summarizing data
    "taxize", # for checking taxonomic names
    "TR8", # for retrieving trait data from databases
    "performance", # for inspecting regression models,
    "rstatix", # for statistical analyses, e.g. ANOVA
    "DescTools", # for miscellaneous basic stats
    "ggpubr", # for plotting
    "ggrepel", # for plotting
    "viridis", # for plotting
    "cowplot", # for plotting
    "vegan", # for multivariate analyses
    "FactoMineR", # for PCA
    "ks" # for kernel smoothers
    )

    # install (if not installed) and load all the packages
    install.packages("devtools")
    devtools::install_github("ddsjoberg/gtsummary")
    devtools::install_github("ropensci/skimr")

    for (p in pkgs) {
    if (!require(p, character.only = TRUE)) install.packages(p)
    library(p, character.only = TRUE)
    }

    # choose an option to treat warnings as errors
    options(warn = 1)

    # check what packages are being used on your machine
    sessionInfo()

    # check your working (home) directory
    here()

    # load data ----------------------------------------------------------------------------------------

    # load the data directly from the Google sheet
    veg <- googlesheets4::read_sheet(
    "https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
    sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
    )

    # take a quick look at the data
    glimpse(veg)

    # save the data to your home directory
    readr::write_csv2(veg, here("veg_data_2022.csv"))

    # load the data from your home directory
    veg <- readr::read_csv2(here("veg_data_2022.csv"))
    glimpse(veg)

    # rename columns -----------------------------------------------------------------------------------

    veg %<>%
    rename(
    Fortlaufendenr = `...1`,
    Aufnahmenr = Aufnahmenr.,
    WissName = `Wissenschaftlicher Name`,
    Hoehe.avg = `Höhe Ø (cm)`,
    Hoehe.var = `Höhe σ²`,
    Blattflaeche = `Blatt-Fläche (mm²)`,
    Frischmasse = `Frischmasse (g)`,
    Trockenmasse = `Trockenmasse (mg)`,
    Germinulenmasse = `Germinulenmasse (mg)`,
    Feuchte = `Ellenberg-Feuchte`,
    Bodenfeuchte = `Bodenfeuchte (%vol)`
    )

    # take a more detailed look at the data
    skimr::skim(veg)
    select(veg, -Fortlaufendenr, -Aufnahmenr, -WissName, -Anmerkungen) %>%
    gtsummary::tbl_summary()

    # check data for errors ----------------------------------------------------------------------------

    # reassign expert water availability measures
    veg %<>% rows_update(tibble(Aufnahmenr = 1:8, WasserHH = c(1, 2, 3, 5, 6, 7, 8, 4)))

    # make sure plot-level data don't have duplicates
    select(veg, Aufnahmenr, Ort, Vegetation, WasserHH, Bodenfeuchte) %>%
    n_distinct() # should equal the number of plots (n = 8)

    # make sure species are not duplicated within plots
    nrow(veg) # 141
    select(veg, Aufnahmenr, WissName) %>%
    n_distinct() # should equal the number of rows (n = 141)

    # make sure species-level data (from databases) are unique
    select(veg, WissName) %>%
    n_distinct() # 79
    select(veg, WissName, Lebensform, Germinulenmasse, Bestaeubung, Feuchte) %>%
    n_distinct() # should equal the number of species (n = 79)

    # check life form
    table(veg$Lebensform)
    filter(veg, Lebensform == "G") %>%
    pull(WissName) %>%
    unique()
    veg %<>% mutate(Lebensform = case_when(
    WissName == "Phalaris arundinacea" ~ "mL",
    TRUE ~ Lebensform
    ))

    # check % cover per species
    hist(veg$Deckung)

    # check total % cover per plot
    group_by(veg, Aufnahmenr) %>%
    summarise(Deckung.tot = sum(Deckung))

    # check vegetation types at each site
    table(veg$Ort, veg$Vegetation)

    # check if Ellenberg humidity indicator values are integers
    unique(veg$Feuchte)
    unique(as.integer(veg$Feuchte))
    veg %<>%
    mutate(
    Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols
    as.integer() # convert to integer
    )

    # check taxon names
    tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE)
    View(tax)
    veg %<>%
    mutate(
    WissName = recode(
    WissName,
    `Taraxacum officinalis` = "Taraxacum officinale",
    `Viola arvense` = "Viola arvensis",
    )
    )

    # check leaf area
    summary(veg$Blattflaeche)
    veg %<>% mutate(Blattflaeche = na_if(Blattflaeche, 0))

    # check seed mass
    summary(veg$Germinulenmasse)
    veg %>%
    filter(Germinulenmasse == .001) %>%
    pull(WissName) %>%
    unique()
    View(available_tr8)
    gmasse_tr8 <- TR8::tr8(
    species_list = c("Agrostis stolonifera", "Cerastium semidecandrum"),
    download_list = c("seed_wght", "SeedMass", "seed_mass"),
    allow_persistent = TRUE
    )
    gmasse_tr8
    veg %<>%
    mutate(Germinulenmasse = case_when(
    WissName == "Cerastium semidecandrum" ~ .04,
    WissName == "Agrostis stolonifera" ~ .07,
    TRUE ~ Germinulenmasse
    ))

    # take a look at the data again
    skim(veg)

    # impute missing trait data ------------------------------------------------------------------------

    # life form (data from FloraWeb)
    select(veg, WissName, Lebensform) %>%
    distinct() %>%
    filter(is.na(Lebensform))
    veg %<>%
    mutate(Lebensform = case_when(
    WissName %in% c("Cerastium glomeratum", "Erophila verna") ~ "mL",
    TRUE ~ Lebensform
    ))

    # pollination type (data from FloraWeb)
    select(veg, WissName, Bestaeubung) %>%
    distinct() %>%
    filter(is.na(Bestaeubung))
    veg %<>%
    mutate(Bestaeubung = case_when(
    WissName %in% c(
    "Cerastium glomeratum", "Erophila verna", "Valerianella spec."
    ) ~ "se",
    TRUE ~ Bestaeubung
    ))

    # seed mass (data from Seed Information Database)
    select(veg, WissName, Germinulenmasse) %>%
    distinct() %>%
    filter(is.na(Germinulenmasse))
    veg %<>%
    mutate(Germinulenmasse = case_when(
    WissName == "Allium vineale" ~ 7,
    WissName == "Carex ovalis" ~ .52,
    WissName == "Cerastium glomeratum" ~ .05,
    WissName == "Cerastium glutinosum" ~ .0654,
    WissName == "Cruciata laevipes" ~ 3.5856,
    WissName == "Erodium cicutarium" ~ 2,
    WissName == "Erophila verna" ~ .024,
    WissName == "Senecio erucifolius" ~ .3935,
    WissName == "Saxifraga granulata" ~ .03,
    WissName == "Prunus mahaleb" ~ 79.9,
    WissName == "Polygonum persicaria" ~ 2.1,
    TRUE ~ Germinulenmasse
    ))
    summary(veg$Germinulenmasse)

    # calculate SLA & LDMC -----------------------------------------------------------------------------

    veg %<>%
    mutate(
    SLA = Blattflaeche / Trockenmasse,
    LDMC = Trockenmasse / Frischmasse
    )

    # check SLA
    hist(veg$SLA)

    # check LDMC
    hist(veg$LDMC)
    filter(veg, LDMC > 500) %>%
    select(Fortlaufendenr, WissName, Deckung, Blattflaeche, Frischmasse, Trockenmasse, LDMC)

    # make corrections in the data
    veg %<>%
    mutate(
    Frischmasse = if_else(Fortlaufendenr == 127, .2063, Frischmasse),
    Trockenmasse = if_else(Fortlaufendenr == 79, 85, Trockenmasse) # it's a guess!
    )

    # recalculate SLA & LDMC
    veg %<>%
    mutate(
    SLA = Blattflaeche / Trockenmasse,
    LDMC = Trockenmasse / Frischmasse
    )

    # analyse species numbers --------------------------------------------------------------------------

    # check the number of species per plot
    artz1 <- table(veg$Aufnahmenr)
    artz1

    # check the number of species at each water availability level
    artz2 <- table(veg$WasserHH)
    artz2
    # test whether the observed species numbers differ from the expected
    chisq.test(artz2)
    chisq.test(artz2)$expected
    barplot(artz2,
    xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = "Artenzahl"
    )

    # analyse water availability -----------------------------------------------------------------------

    # check the correspondence between the Ellenberg values and estimated water availability
    boxplot(veg$Feuchte ~ veg$WasserHH)

    # calculate the mean plot-level Ellenberg humidity indicator value
    WasserHH.Feuchte <- group_by(veg, Aufnahmenr, WasserHH, Bodenfeuchte) %>%
    summarise(
    # community mean
    Feuchte_CM = mean(Feuchte, na.rm = TRUE) %>%
    round(., 1),
    # community weighted mean
    Feuchte_CWM = weighted.mean(x = Feuchte, w = Deckung, na.rm = TRUE) %>%
    round(., 1)
    )

    # plot the plot-level Ellenberg humidity
    op <- par(mfcol = c(1, 2), mar = c(4, 4, 2, 2))
    plot(Feuchte_CWM ~ WasserHH,
    data = WasserHH.Feuchte, col = "red",
    pch = 20, ylab = "mittlere Feuchte nach Ellenberg"
    )
    points(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
    legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)

    plot(Feuchte_CWM ~ Bodenfeuchte, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "")
    points(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
    legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)
    par(op)

    # calculate correlation between the water availability and plot-level Ellenberg humidity
    cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, method = "kendall", exact = FALSE)
    cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE)
    cor.test(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CWM,
    method = "kendall", exact = FALSE
    )

    # add the calculated mean Ellenberg humidity to the main data table
    veg %<>% left_join(WasserHH.Feuchte)
    glimpse(veg)

    # analyse LDMC -------------------------------------------------------------------------------------

    # LDMC = leaf dry matter content

    # inspect the distribution of LDMC visually
    hist(veg$LDMC)

    # plot LDMC across water availability levels
    boxplot(
    LDMC ~ WasserHH,
    dat = veg, ylab = "LDMC", xlab = "geschaetzte Wasserverfuegbarkeit (ordinal)",
    col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
    )
    boxplot(LDMC ~ Feuchte_CWM,
    dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg",
    x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
    )
    ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    geom_point(na.rm = TRUE) +
    stat_smooth(method = "lm", col = '#E69F00', se = FALSE, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]") +
    theme_minimal()

    # perform regression analyses
    # using WasserHH as explanatory variable
    fm0.LDMC <- lm(LDMC ~ WasserHH, data = veg)
    summary(fm0.LDMC)
    performance::check_model(fm0.LDMC)

    fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover
    summary(fm1.LDMC)
    performance::check_model(fm1.LDMC)

    # compare the two models
    tbl_merge(
    tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)),
    tab_spanner = c("**Non-weighted**", "**Weigted**")
    )

    # using Bodenfeuchte as explanatory variable
    fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg)
    summary(fm0.LDMC.b)
    performance::check_model(fm0.LDMC.b)

    fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.LDMC.b)
    performance::check_model(fm1.LDMC.b)

    # compare the two models
    tbl_merge(
    tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)),
    tab_spanner = c("**Non-weighted**", "**Weigted**")
    )

    ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2],
    col = '#E69F00', size = 1, na.rm = TRUE) +
    geom_abline(intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]") +
    theme_minimal() +
    annotate("text", x = 20, y = 270, label = "Non-weighted", col = '#E69F00') +
    annotate("text", x = 25, y = 320, label = "Weighted", col = '#56B4E9')

    # analyse SLA --------------------------------------------------------------------------------------

    # SLA = specific leaf area

    # inspect the distribution of SLA visually
    hist(veg$SLA)
    hist(log(veg$SLA))

    # test the normality of distribution
    shapiro_test(veg$SLA)
    shapiro_test(log(veg$SLA))

    # add log-transformed values to the main data table
    veg %<>% mutate(SLA.log = log(SLA))

    # plot the distribution of SLA across water availability levels
    op <- par(mar = c(4, 5, 2, 3))
    boxplot(SLA ~ WasserHH,
    dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = "SLA", col = "blue", log = "y"
    )
    filter(veg, WasserHH == 2 & SLA > 25) %>%
    select(WissName, Aufnahmenr, SLA)
    filter(veg, WissName == "Euphorbia cyparissias") %>%
    select(WissName, Aufnahmenr, SLA)

    boxplot(SLA.log ~ WasserHH,
    dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = expression(log("SLA")), col = "blue"
    )
    par(op)
    filter(veg, WasserHH == 7 & SLA.log < 3) %>%
    select(WissName, Aufnahmenr, SLA)

    # perform regression analyses
    fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.SLA)
    performance::check_model(fm1.SLA)

    fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.SLA.log)
    performance::check_model(fm1.SLA.log)

    fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.SLA.log.b)
    performance::check_model(fm1.SLA.log.b)

    # plot the regression line
    ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) +
    theme_minimal()

    # look at the relationship between SLA and LDMC
    plot(SLA ~ LDMC, data = veg)
    cor.test(veg$SLA, veg$LDMC, use = "complete.obs")

    # analyse height -----------------------------------------------------------------------------------

    # inspect distributions visually
    hist(veg$Hoehe.avg)
    hist(log(veg$Hoehe.avg))

    # test the normality of distribution
    shapiro_test(veg$Hoehe.avg)
    shapiro_test(log(veg$Hoehe.avg))

    # add log-transformed values to the main data table
    veg %<>% mutate(Hoehe.avg.log = log(Hoehe.avg))

    # plot the distribution of height across water availability levels
    op <- par(mar = c(4, 5, 2, 3))
    boxplot(Hoehe.avg ~ WasserHH,
    data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = "Wuchshöhe [cm]", col = rainbow(7, start = 1, end = 5 / 7),
    log = "y"
    )

    filter(veg, WasserHH == 5 & Hoehe.avg > 50) %>%
    select(WissName, Aufnahmenr, Hoehe.avg)
    filter(veg, WasserHH == 7 & Hoehe.avg < 20) %>%
    select(WissName, Aufnahmenr, Hoehe.avg)

    boxplot(Hoehe.avg.log ~ WasserHH,
    data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = expression(log("Wuchshöhe [cm]")), col = rainbow(7, start = 1, end = 5 / 7)
    )
    par(op)

    # perform regression analysis
    fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg)
    summary(fm1.hoehe.avg.log)
    performance::check_model(fm1.hoehe.avg.log)

    fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg)
    summary(fm1.hoehe.avg.log.b)
    performance::check_model(fm1.hoehe.avg.log.b)

    # plot the regression line
    ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) +
    geom_point(na.rm = TRUE) +
    geom_abline(intercept = fm1.hoehe.avg.log.b$coefficients[1],
    slope = fm1.hoehe.avg.log.b$coefficients[2],
    col = '#56B4E9', size = 1, na.rm = TRUE) +
    geom_smooth(col = "red", se = FALSE, na.rm = TRUE) +
    labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) +
    theme_minimal()

    # analyse height variance --------------------------------------------------------------------------

    # plot height variance
    hist(veg$Hoehe.var)
    hist(log(veg$Hoehe.var))

    # add log-transformed values to the main data table
    veg %<>% mutate(Hoehe.var.log = log(Hoehe.var))

    # check the relationship between height and height variance
    boxplot(Hoehe.var.log ~ WasserHH, data = veg)
    plot(Hoehe.var.log ~ Hoehe.avg, data = veg)
    cor.test(veg$Hoehe.var.log, veg$Hoehe.avg)

    # calculate the coefficient of variation
    veg %<>% mutate(Hoehe.cv = sqrt(Hoehe.var) / Hoehe.avg * 100)

    hist(veg$Hoehe.cv)
    hist(log(veg$Hoehe.cv))

    veg %<>% mutate(Hoehe.cv.log = log(Hoehe.cv))

    plot(Hoehe.cv.log ~ Hoehe.avg, data = veg)
    cor.test(veg$Hoehe.cv.log, veg$Hoehe.avg)

    boxplot(Hoehe.cv ~ WasserHH,
    data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1]
    )
    filter(veg, WasserHH == 3 & Hoehe.cv > 100) %>%
    select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv)

    # perform regression analysis
    fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung)
    summary(fm1.Hoehe.cv.log)
    performance::check_model(fm1.Hoehe.cv.log)

    fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung)
    summary(fm1.Hoehe.cv.log.b)
    performance::check_model(fm1.Hoehe.cv.log.b)

    # analyse seed mass --------------------------------------------------------------------------------

    hist(veg$Germinulenmasse)
    hist(log(veg$Germinulenmasse))

    veg %<>% mutate(Germinulenmasse.log = log(Germinulenmasse))

    op <- par(mar = c(4, 5, 2, 3))
    boxplot(Germinulenmasse ~ WasserHH, data = veg, log = "y")
    boxplot(Germinulenmasse.log ~ WasserHH, data = veg, ylab = expression(log("Germinulenmasse")))
    filter(veg, Germinulenmasse.log > 4) %>%
    select(WissName, Germinulenmasse)
    par(op)

    # perform regression analysis
    fm1.gmasse.log <- lm(Germinulenmasse.log ~ WasserHH, data = veg, weight = Deckung)
    summary(fm1.gmasse.log)
    check_model(fm1.gmasse.log)

    fm1.gmasse.log.b <- lm(Germinulenmasse.log ~ Bodenfeuchte, data = veg, weight = Deckung)
    summary(fm1.gmasse.log.b)
    check_model(fm1.gmasse.log.b)

    # analyse life form --------------------------------------------------------------------------------

    # compare the distributions of life forms across vegetation types
    select(veg, Lebensform, Vegetation) %>%
    gtsummary::tbl_summary(by = Vegetation) %>%
    add_p()

    # calculate total % cover per life form across humidity levels
    lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>%
    summarise(Deckung = sum(Deckung))
    head(lft.1)
    tail(lft.1)

    lft.2 <- group_by(lft.1, Lebensform, WasserHH) %>%
    summarise(Deckung = mean(Deckung), .groups = "drop")
    head(lft.2)

    lft.3 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.2)
    lft.3 <- DescTools::as.matrix.xtabs(lft.3)
    lft.3
    chisq_test(lft.3, simulate.p.value = TRUE)

    # plot the distribution of life forms across humidity levels
    # using base R
    op <- par(mar = c(4, 5, 2, 3))
    barplot(lft.3,
    xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
    ylab = expression(sum("Deckung")),
    main = "Lebensformenspektren", ylim = c(0, 160)
    )
    legend(5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
    par(op)

    # using ggplot2
    ggplot(data = lft.2, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
    geom_bar(position = "stack", stat = "identity") +
    viridis::scale_fill_viridis(discrete = TRUE) +
    labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
    ggtitle("Lebensformenspektren") +
    theme_minimal()

    # analyse pollination type -------------------------------------------------------------------------

    # compare the distributions of pollination syndromes across vegetation types
    select(veg, Bestaeubung, Vegetation) %>%
    gtsummary::tbl_summary(by = Vegetation) %>%
    add_p()

    # calculate total % cover per pollination type across humidity levels
    bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>%
    summarise(Deckung = sum(Deckung))
    head(bst.1)
    tail(bst.1)

    bst.2 <- group_by(bst.1, Bestaeubung, WasserHH) %>%
    summarise(Deckung = mean(Deckung), .groups = "drop")
    head(bst.2)

    bst.3 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.2)
    bst.3 <- DescTools::as.matrix.xtabs(bst.3)
    bst.3
    chisq_test(bst.3, simulate.p.value = TRUE)

    # plot the distribution of pollination types
    # using base R
    op <- par(mar = c(4, 5, 2, 3))
    cols <- c("yellow", "brown", "grey", "lightblue")
    barplot(bst.3,
    xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")),
    main = "Bestäubungstypenspektren",
    sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols
    )
    legend(0, 140,
    legend = c("Wind", "Selbst", "multipel", "Insekten"),
    pch = 15, col = cols[4:1], cex = 0.7
    )

    # using ggplot2
    ggplot(data = bst.2, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
    geom_bar(position = "stack", stat = "identity") +
    viridis::scale_fill_viridis(
    discrete = TRUE, labels = c("Insekten", "multipel", "Selbst", "Wind")
    ) +
    labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
    ggtitle("Bestäubungstypenspektren") +
    theme_minimal()

    # analyse the relationships between traits ---------------------------------------------------------

    # test if height varies across life forms
    # plot height by life form
    ggpubr::ggboxplot(veg, x = "Lebensform", y = "Hoehe.avg.log", select = c("H", "G", "T", "mL"))

    # check the ANOVA assumptions
    # outliers
    group_by(veg, Lebensform) %>%
    identify_outliers(Hoehe.avg.log)
    # normality
    lm.hoe.lf <- lm(Hoehe.avg.log ~ Lebensform, data = veg)
    ggpubr::ggqqplot(residuals(lm.hoe.lf))
    rstatix::shapiro_test(residuals(lm.hoe.lf))
    # homogeneity of variances
    levene_test(veg, Hoehe.avg.log ~ Lebensform)

    # perform ANOVA and the Kruskal-Wallis test (non-parametric alternative)
    anova_test(veg, Hoehe.avg.log ~ Lebensform)
    kruskal_test(veg, Hoehe.avg.log ~ Lebensform)


    # test if the height varies across pollination types
    ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log")
    group_by(veg, Bestaeubung) %>%
    identify_outliers(Hoehe.avg.log) %>%
    select(Fortlaufendenr, WissName, Hoehe.avg, Hoehe.avg.log, is.outlier, is.extreme)
    lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
    ggqqplot(residuals(lm.hoe.best))
    shapiro_test(residuals(lm.hoe.best))
    levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.aov <- anova_test(veg, Hoehe.avg.log ~ Bestaeubung)

    # perform a post-hoc test (i.e. pairwise comparisons)
    hoe.best.pwc <- tukey_hsd(veg, Hoehe.avg.log ~ Bestaeubung)
    hoe.best.pwc

    # plot the results
    hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung")
    ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
    stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) +
    labs(
    subtitle = get_test_label(hoe.best.aov, detailed = TRUE),
    caption = get_pwc_label(hoe.best.pwc),
    y = expression(log("Wuchshöhe")),
    x = "Bestäubungstyp"
    )


    # test if the SLA varies across life forms
    ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL"))
    group_by(veg, Lebensform) %>%
    identify_outliers(SLA.log)
    lm.sla.lf <- lm(SLA.log ~ Lebensform, data = veg)
    ggqqplot(residuals(lm.sla.lf))
    shapiro_test(residuals(lm.sla.lf))
    levene_test(veg, SLA.log ~ Lebensform)
    anova_test(veg, SLA.log ~ Lebensform)


    # test if the seed mass varies across life forms
    gmasse <- select(veg, WissName, Germinulenmasse.log, Lebensform, Bestaeubung) %>%
    distinct()
    ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log")
    group_by(gmasse, Lebensform) %>%
    identify_outliers(Germinulenmasse.log)
    lm.gmasse.lf <- lm(Germinulenmasse.log ~ Lebensform, data = gmasse)
    ggqqplot(residuals(lm.gmasse.lf))
    shapiro_test(residuals(lm.gmasse.lf))
    levene_test(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform)
    gmasse.lf.pwc

    gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform")
    ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
    stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) +
    labs(
    subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE),
    caption = get_pwc_label(gmasse.lf.pwc),
    y = expression(log("Germinulenmasse")),
    x = "Lebensform"
    )


    # test if the seed mass varies across pollination types
    ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log")
    group_by(gmasse, Bestaeubung) %>%
    identify_outliers(Germinulenmasse.log)
    fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse)
    ggpubr::ggqqplot(residuals(fm.gmasse.best))
    rstatix::shapiro_test(residuals(fm.gmasse.best))
    ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung")
    gmasse.best.aov <- anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
    gmasse.best.aov
    gmasse.best.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Bestaeubung)
    gmasse.best.pwc

    gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung")
    ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") +
    stat_pvalue_manual(gmasse.best.pwc, hide.ns = TRUE) +
    labs(
    subtitle = get_test_label(gmasse.best.aov, detailed = TRUE),
    caption = get_pwc_label(gmasse.best.pwc),
    y = expression(log("Germinulenmasse")),
    x = "Bestäubungstyp"
    )

    # analyse species composition ----------------------------------------------------------------------

    mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
    str(mat)

    # perform correspondence analysis
    decorana(mat)

    fm.ca <- cca(mat)
    fm.ca
    summary(fm.ca)
    plot(fm.ca, display = "sites")
    plot(fm.ca)

    # shorten taxon names
    wname <- str_remove(veg$WissName, pattern = " cf\\.| x") %>%
    str_split(pattern = " ", simplify = TRUE) %>%
    substr(., 1, 3)
    wname <- paste(wname[, 1], wname[, 2], sep = ".")
    veg %<>% mutate(wname = wname)

    # repeat the same analysis with shortened taxon names provided
    mat <- xtabs(Deckung ~ Aufnahmenr + wname, data = veg)
    fm.ca <- cca(mat)
    summary(fm.ca)

    # plot the results
    summary(fm.ca)$species %>%
    as_tibble() %>%
    mutate(wname = rownames(summary(fm.ca)$species)) %>%
    ggplot(aes(x = CA1, y = CA2, label = wname)) +
    geom_point() +
    ggrepel::geom_text_repel(size = 1.5, force = 15, max.overlaps = 30) +
    theme_bw()

    # prepare data for trait ordination ----------------------------------------------------------------

    # calculate the proportion of each life form per plot
    lf.kt <- select(veg, Aufnahmenr, Deckung, Lebensform) %>%
    pivot_wider(
    names_from = Lebensform, values_from = Deckung,
    values_fn = sum, values_fill = 0, names_prefix = "lf."
    ) %>%
    mutate(total = rowSums(.[, -1])) %>%
    mutate(across(lf.H:lf.M, ~ . / total), .keep = "unused")

    # calculate the proportion of each pollination type per plot
    best.kt <- select(veg, Aufnahmenr, Deckung, Bestaeubung) %>%
    pivot_wider(
    names_from = Bestaeubung, values_from = Deckung,
    values_fn = sum, values_fill = 0, names_prefix = "b."
    ) %>%
    mutate(total = rowSums(.[, -1])) %>%
    mutate(across(b.wi:b.mb, ~ . / total), .keep = "unused")

    # compute community weighted means for numerical traits
    cwm <- group_by(veg, Aufnahmenr) %>%
    summarise(across(
    c(Hoehe.avg.log, Hoehe.cv.log, LDMC, SLA.log, Germinulenmasse.log),
    ~ weighted.mean(., w = Deckung, na.rm = TRUE)
    )) %>%
    rename(
    Ho.av = Hoehe.avg.log,
    Ho.cv = Hoehe.cv.log,
    SLA = SLA.log,
    GM = Germinulenmasse.log
    )

    # combine the three tables
    cwm %<>%
    left_join(lf.kt) %>%
    left_join(best.kt)
    cwm

    # perform principal components analysis (PCA) of CWMs ----------------------------------------------

    # analyse all traits
    pca1 <- FactoMineR::PCA(cwm[, -c(1:2)])
    summary(pca1)

    cowplot::plot_grid(
    plot.PCA(pca1),
    plot.PCA(pca1, c(1, 3))
    )

    # analyse numerical traits only
    colnames(cwm)
    pca2 <- PCA(select(cwm, Ho.av:GM))
    cowplot::plot_grid(
    plot.PCA(pca2),
    plot.PCA(pca2, c(1, 3))
    )

    # perform PCA of species traits --------------------------------------------------------------------

    # create a separate table containing numerical traits
    dat <- select(
    veg, WissName, Vegetation, Blattflaeche, Frischmasse, Trockenmasse,
    SLA, LDMC, Hoehe.avg, Hoehe.var, Germinulenmasse
    ) %>%
    rename(
    BlF = Blattflaeche,
    FrM = Frischmasse,
    TrM = Trockenmasse,
    Ho.av = Hoehe.avg,
    Ho.var = Hoehe.var,
    GM = Germinulenmasse
    ) %>%
    na.omit()
    glimpse(dat)

    # perform PCA
    pca3 <- FactoMineR::PCA(dat[, -c(1:2)])
    summary(pca3)
    plot.PCA(pca3)

    # add PC to the trait table
    dat$pc1 <- pca3$ind$coord[, 1]
    dat$pc2 <- pca3$ind$coord[, 2]
    pc12 <- pca3$ind$coord[, 1:2]

    # plot the PCA results
    ggplot(data = dat, aes(x = pc1, y = pc2, color = Vegetation, shape = Vegetation)) +
    geom_hline(yintercept = 0, lty = 2) +
    geom_vline(xintercept = 0, lty = 2) +
    geom_point(alpha = .8) +
    stat_ellipse(
    geom = "polygon", aes(fill = Vegetation), alpha = .2,
    show.legend = FALSE, level = .95
    ) +
    guides(
    color = guide_legend(title = "Vegetation"),
    shape = guide_legend(title = "Vegetation")
    ) +
    labs(x = "PC 1 (45.24%)", y = "PC 2 (20.27%)") +
    theme_minimal() +
    theme(
    panel.grid = element_blank(),
    panel.border = element_rect(fill = "transparent"),
    legend.position = "bottom"
    )

    # plot PCA results using kernels
    # Code from Björn Reu
    # Used to produce Fig. 2 in Diaz et al. 2016, Nature

    H <- Hpi(x = pc12) # optimal bandwidth estimation
    est <- kde(x = pc12, H = H, compute.cont = TRUE) # kernel density estimation

    # set contour probabilities for drawing contour levels
    cl <- contourLevels(est, prob = c(.5, .05, .001), approx = TRUE)

    # use envfit for drawing arrows, can be also done using trait loadings
    fit <- envfit(pc12, select(dat, BlF:GM))

    # create a plot
    par(mar = c(4, 4, 2, 2))
    cols <- ifelse(dat$Vegetation == "TR", "darkred", "darkblue")
    plot(est,
    cont = seq(1, 100, by = 1), display = "filled.contour2", add = FALSE, ylab = "", xlab = "",
    cex.axis = .75, ylim = c(-5, 4), xlim = c(-3, 7), las = 1
    )
    plot(est, abs.cont = cl[1], labels = .5, labcex = .75, add = TRUE, lwd = .75, col = "grey30")
    plot(est, abs.cont = cl[2], labels = .95, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
    plot(est, abs.cont = cl[3], labels = .99, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
    points(pc12, pch = 16, cex = .5, col = cols)
    plot(fit, cex = .90, col = 1)
    mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8)
    mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8)

    dat[dat$pc1 > 4, ]
    dat[dat$pc2 < -3, ]