# The R functions used to produce the generalised diameter distributions in Mey et al. 2020. # Weibull functions in section 1 are needed to run the uni- and bimodal diameter models in section 2 and 3. # Author: Reinhard Mey # Date: 22.07.2020, last change 01.12.2020 # Questions/ feedback to R. Mey # Section 1: WEIBULL FUNCTIONS ######################################################################################################### # Functions to "2.2.1 Truncated unimodal diameter distributions" f.tdweibull <- function(x, sh, sc, ys) { # Unimodal left-truncated PDF # sh = shape parameter # sc = scale parameter # ys = truncation threshold ifelse(x >= ys , (sh/sc) * (x/sc)^(sh-1) * exp((ys^sh-x^sh)/sc^sh), 0 ) } f.tpweibull <- function(x, sh, sc, ys) { # Unimodal left-truncated CDF (used for normalisation in f.mtdweibull) # sh = shape parameter # sc = scale parameter # ys = truncation threshold ifelse(x >= ys , (1 - exp((ys/sc)^sh) * exp(-(x/sc)^sh)), 0) } f.mtdweibull <- function(x, sh, sc, ys=c(0, 4, 12, 36)) { # Unimodal multi-left-truncated PDF (eqn 2) # sh = shape parameter # sc = scale parameter # ys = truncation thresholds in the Swiss NFI survey design (at 0, 4, 12 and 36 cm) ifelse(x < ys[2] , f.tdweibull(x, sh, sc, ys[1]) , ifelse( (x >= ys[2]) & (x < ys[3]), f.tdweibull(x, sh, sc, ys[2]) , ifelse((x >= ys[3]) & (x < ys[4]), f.tdweibull(x, sh, sc, ys[3]) , f.tdweibull(x, sh, sc, ys[4]) )))/ sum(f.tpweibull(ys[2], sh, sc, ys[1]), f.tpweibull(ys[3], sh, sc, ys[2]), f.tpweibull(ys[4], sh, sc, ys[3]), 1) # normalisation to [0,1] } # Functions to "2.2.2 Truncated bimodal diameter distributions" f.bitdweibull <- function(x, sh1, sc1, sh2, sc2, rho, ys) { # Bimodal left-truncated Weibull PDF # sh1, sh2 = shape parameters # sc1, sc2 = scale parameters # rho = proportion parameter # ys = truncation threshold ifelse(x >= ys , rho*((sh1/sc1) * (x/sc1)^(sh1-1) * exp((ys^sh1-x^sh1)/sc1^sh1)) + (1-rho)*((sh2/sc2) * (x/sc2)^(sh2-1) * exp((ys^sh2-x^sh2)/sc2^sh2)), 0 ) } f.bitpweibull <- function(x, sh1, sc1, sh2, sc2, rho, ys) { # Bimodal left-truncated CDF (used for normalisation in f.bimtdweibull) # sh1, sh2 = shape parameters # sc1, sc2 = scale parameters # rho = proportion parameter # ys = truncation threshold ifelse(x >= ys , rho*(1 - exp((ys/sc1)^sh1) * exp(-(x/sc1)^sh1)) + (1-rho)*(1 - exp((ys/sc2)^sh2) * exp(-(x/sc2)^sh2)), 0) } f.bimtdweibull <- function(x, sh1, sc1, sh2, sc2, rho, ys=c(0, 4, 12, 36)) { # Bimodal multi-left-truncated PDF (eqn 3) # sh1, sh2 = shape parameters # sc1, sc2 = scale parameters # rho = proportion parameter # ys = truncation thresholds in the Swiss NFI survey design (at 0, 4, 12 and 36 cm) ifelse(x < ys[2] , f.bitdweibull(x, sh1, sc1, sh2, sc2, rho, ys[1]) , ifelse( (x >= ys[2]) & (x < ys[3]), f.bitdweibull(x, sh1, sc1, sh2, sc2, rho, ys[2]) , ifelse((x >= ys[3]) & (x < ys[4]), f.bitdweibull(x, sh1, sc1, sh2, sc2, rho, ys[3]) , f.bitdweibull(x, sh1, sc1, sh2, sc2, rho, ys[4]) )))/ sum(f.bitpweibull(ys[2], sh1, sc1, sh2, sc2, rho, ys[1]), f.bitpweibull(ys[3], sh1, sc1, sh2, sc2, rho, ys[2]), f.bitpweibull(ys[4], sh1, sc1, sh2, sc2, rho, ys[3]), 1) # normalisation to [0,1] } # Section 2: Functions to "2.2.3 Parameter estimation of uni- and bimodal distributions using one sample" ######################################################################################################### # UNIMODAL CASE f.fit.mtdweibull.NFI <- function(data, start=c(shape=2, scale=20)) { # Maximum likelihood estimation of the two parameters in f.mtdweibull # data is a dataframe with dbh (diameter) and repfa (representation factor) # Starting values for optimisation are fixed to shape=2 and scale=20 ll.denswei <- function(parms, dbhs=data$dbh + 0.5){ # + 0.5, as NFI diameter measurements are floored to integers and otherwise zero diameters occur # function to calculate the log-likelihood of the truncated unimodal Weibull PDF shape <- parms["shape"] scale <- parms["scale"] sum(log( f.mtdweibull(dbhs, sh=shape, sc=scale) )) } # fit using optim (fit <- try(optim(start, fn=ll.denswei, dbhs=data$dbh + 0.5, # + 0.5, as NFI diameter measurements are floored to integers and otherwise zero diameters occur lower=c(0.00001, 0.1), upper=c(100, 1000), method="L-BFGS-B", control=list(fnscale=-1), hessian=TRUE), silent=TRUE)) if (class(fit) == "try-error") { return(c(est.shape=NA, est.scale=NA, se.shape=NA, se.scale=NA, AIC=NA)) } else { est <- fit$par se <- sqrt(abs(diag(solve(fit$hessian)))) # solve creates the inverse of the matrix AIC <- -2 * fit$value + 2 * length(fit$par) # AIC = -2*logL + 2*edf return (c(est=est, se=se, AIC=AIC)) } } # BIMODAL CASE f.fit.bimtdweibull.NFI <- function(data, start=c(shape1=2, scale1=20, shape2=2, scale2=50, rho=0.7)) { # Maximum likelihood estimation of the five parameters in f.bimtdweibull # data is a dataframe with dbh (diameter) and repfa (representation factor) # Starting values for optimisation are fixed to shape1 and shape2=2, scale1 and scale2=20 and proportion parameter=0.7 ll.dens2wei2 <- function(parms, dbhs=data$dbh + 0.5){ # + 0.5, as NFI diameter measurements are floored to integers and otherwise zero diameters occur # function to calculate the log-likelihood of the truncated bimodal Weibull PDF (eqn 4) shape1 <- parms["shape1"] scale1 <- parms["scale1"] shape2 <- parms["shape2"] scale2 <- parms["scale2"] rho <- parms["rho"] sum(log( f.bimtdweibull(dbhs, sh1=shape1, sc1=scale1, sh2=shape2, sc2=scale2, rho=rho) )) } # fit using optim (fit <- try(optim(start, fn=ll.dens2wei2, dbhs=data$dbh + 0.5, # + 0.5, as NFI diameter measurements are floored to integers and otherwise zero diameters occur lower=c(0.00001, 0.1, 0.00001, 0.1, 0.00001), upper=c(100, 1000, 100, 1000, 0.99999), method="L-BFGS-B", control=list(fnscale=-1), hessian=TRUE), silent=TRUE)) if (class(fit) == "try-error") { return(c(est.shape1=NA, est.scale1=NA, est.shape2=NA, est.scale2=NA, est.rho=NA, se.shape1=NA, se.scale1=NA, se.shape2=NA, se.scale2=NA, se.rho=NA, AIC=NA)) } else { # permute sh1, sc1 with sh2, sc2 if sc1 > sc2 --> to ensure the correct classification of under- and overstorey if (fit$par["scale1"] > fit$par["scale2"]){ est <- c(fit$par["shape2"], fit$par["scale2"], fit$par["shape1"], fit$par["scale1"], 1-fit$par["rho"]) names(est) <- names(fit$par) se <- c(se.shape1=NA, se.scale1=NA, se.shape2=NA, se.scale2=NA, se.rho=NA) } else { est <- fit$par se <- sqrt(abs(diag(solve(fit$hessian)))) } AIC <- -2 * fit$value + 2 * length(fit$par) # AIC = -2*logL + 2*edf return (c(est=est, se=se, AIC=AIC)) } } # Section 3: Functions to "2.2.4 Parameter estimation of the uni- and bimodal Weibull log-likelihood regression models using multiple samples" # UNI- AND BIMODAL DIAMETER MODELS ######################################################################################################### # --- UNIMODAL MODEL --- f.unimodal.model <- function(data.dbh, data.stand, optimizer, start=c("b0"=0.318446, "b1"=0.437625, "b2"=0.053726, "b3"=4.01804, "b4"=-0.40918, "b5"=1.23391)) { # data.dbh = list with observed diameters per NFI sample # data.stand = data frame with stand attributes per NFI sample used as explanatory variables # optimizer = "Nelder-Mead" # start = starting values are coming from single regression models separately fitted to the unimodal Weibull parameters (cf. SI1.3 Diameter models) f.ll.reg.w1 <- function(p, d, s){ # Function to calculate the log-likelihood of truncated unimodal Weibull densities over N plots, # where the parameters "p" (vector with names) are coefficients of the regression functions of observable predictors # 6 parameters of the (best) unimodal model (cf. SI1.3 Model building): # shape: b0, b1, b2 (intercept, mean diameter, spread) # scale: b3, b4, b5 (intercept, log(stems per hectare), diameter 90% quantile) b0 <- p["b0"]; b1 <- p["b1"]; b2 <- p["b2"] b3 <- p["b3"]; b4 <- p["b4"]; b5 <- p["b5"] ll <- 0 N <- length(d) for (i in 1:N){ # loop over N samples d.temp <- d[[i]] # X: expl. stand variables X <- s[i, ] sh <- exp( t(c(1, X$md, X$spr )) %*% as.matrix( c(b0, b1, b2)) ) sc <- ( t(c(1, X$lnph, X$dq90 )) %*% as.matrix( c(b3, b4, b5)) )^2 ll.temp <- -sum(log( f.mtdweibull(x=d.temp$dbh + 0.5, sh=sh, sc=sc) )) if(!is.na(ll.temp)){ ll <- ll + ll.temp } } ll } fit <- try( optim(par=start, # here we minimize the negative log-likelihood fn=f.ll.reg.w1, d=data.dbh, s=data.stand, lower=-Inf, upper=Inf, method=optimizer, gr=NULL, control=list(trace = 2), hessian=TRUE) ) fit } # --- BIMODAL MODEL --- f.bimodal.model <- function(data.dbh, data.stand, optimizer, start=c("b0"=0.49275, "b1"=0.85070, "b2"=2.74795, "b3"=1.26120, "b4"=2.32216, "b5"=-0.46501, "b6"=0.64993, "b7"=6.99977, "b8"=1.02009)) { # data.dbh = list with observed diameters per NFI sample # data.stand = data frame with stand attributes per NFI sample used as explanatory variables # optimizer = "Nelder-Mead" # start = starting values are coming from single regression models separately fitted to the bimodal Weibull parameters (cf. SI1.3 Diameter models) f.ll.reg.w2 <- function(p, d, s){ # Function (to eqn 5) to calculate the log-likelihood of truncated bimodal Weibull densities over N plots, # where the parameters "p" (vector with names) are coefficients of the regression functions of observable predictors # 9 parameters of the (best) bimodal model (cf. SI1.3 Model building): # shape1: b0, b1 (intercept + diameter 10% quantile) # scale1: b2, b3 (intercept + mean diameter) # shape2: b4, b5, b6 (intercept + spread + overstorey quadratic mean diameter) # scale2: b7, b8 (intercept + overstorey quadratic mean diameter) b0 <- p["b0"]; b1 <- p["b1"]; b2 <- p["b2"]; b3 <- p["b3"]; b4 <- p["b4"] b5 <- p["b5"]; b6 <- p["b6"]; b7 <- p["b7"]; b8 <- p["b8"] ll <- 0 N <- length(d) for (i in 1:N){ # loop over N samples d.temp <- d[[i]] # X: expl. stand variables X <- s[i, ] sh1 <- exp( t(c(1, X$dq10 )) %*% as.matrix( c(b0, b1)) ) sc1 <- ( t(c(1, X$md)) %*% as.matrix( c(b2, b3)) )^2 sh2 <- exp( t(c(1, X$spr, X$o.qmd)) %*% as.matrix( c(b4, b5, b6)) ) sc2 <- ( t(c(1, X$o.qmd)) %*% as.matrix( c(b7, b8)) )^2 rho <- X$rho.emp # the proportion parameter (ρ) is determined previously by the breakpoint setting (cf. SI1.2) ll.temp <- -sum(log( f.bimtdweibull(x=c(d.temp$dbh.under + 0.5, d.temp$dbh.over + 0.5), sh1=sh1, sc1=sc1, sh2=sh2, sc2=sc2, rho=rho) )) if(!is.na(ll.temp)){ ll <- ll + ll.temp } } ll } fit <- try( optim(par=start, # here we minimize the negative log-likelihood fn=f.ll.reg.w2, d=data.dbh, s=data.stand, lower=-Inf, upper=Inf, method=optimizer, gr=NULL, control=list(trace = 2), hessian=TRUE) ) fit } # Section 4: HELPER FUNCTIONS ######################################################################################################### # Functions to "SI1.2 Identification of uni- and bimodal NFI diameter samples" f.breakp <- function(df) { # Function that finds a breakpoint between under- and overstorey of a diameter distribution via histogram classes # df is a list with NFI samples including dbh (diameter) and repfa (representation factor) res <- matrix(0, nrow=length(df), ncol=2) rownames(res) <- names(df) for (t in 1:length(df)){ hist.d <- hist(rep(df[[t]]$dbh+0.5, df[[t]]$repfa), plot=F, breaks="Sturges") # find zeros and count consecutive zeros b.rle <- rle(hist.d$density == quantile(hist.d$density, 0.05)) # quantile(hist.d$density, 0.1) # range of the diameters per sample must be at least 20 cm ! if (diff(range(df[[t]]$dbh)) >= 20) { # 1. if there are zero probabilities in a row do: if (length(which(b.rle$lengths >= 2 & b.rle$values == TRUE)) >= 1) { # where do we have at least 2 zeros in a row? pos.zero <- which(b.rle$lengths >= 2 & b.rle$values == TRUE) # select the longest zero series pos.zero.sel <- pos.zero[which.max(b.rle$lengths[pos.zero])] # find the diameter class before the selected zero series low.b <- hist.d$breaks[sum(b.rle$lengths[1:pos.zero.sel-1])+1] # find the diameter class after the selected zero series high.b <- hist.d$breaks[sum(b.rle$lengths[1:pos.zero.sel])+1] # the breakpoint is in between breakp <- mean(c(low.b, high.b)) # remove the breakpoint if the density below or above the breakpoint is smaller than 10% = / 10, 5% = / 20 if (sum(hist.d$counts[hist.d$mids < breakp]) < round(sum(hist.d$counts) / 10) | sum(hist.d$counts[hist.d$mids > breakp]) < round(sum(hist.d$counts) / 10) ) { breakp <- NA } # 2. if there are no zero probabilities in a row find the two highest densities and calculate the space between them # set the breakpoint if the difference between the maxima is more than the mean density # and the space between them is more than half of number of diameter classes } else if ( abs(diff(hist.d$density[order(hist.d$density, na.last=TRUE, decreasing=TRUE)[1:2]])) > mean(hist.d$density) & abs(diff(order(hist.d$density, na.last=TRUE, decreasing=TRUE)[1:2])) > round(length(hist.d$breaks)/2) ) { breakp <- mean(hist.d$mids[order(hist.d$density, na.last=TRUE, decreasing=TRUE)[1:2]]) } # 3. in all other cases no breakpoint is set else { breakp <- NA } } else { breakp <- NA } if (is.na(breakp)==FALSE) { lower <- xtabs(~ rep(df[[t]]$dbh+0.5, df[[t]]$repfa) <= breakp) rho.emp <- lower["TRUE"] / (lower["FALSE"] + lower["TRUE"]) } else { rho.emp <- NA } res[t, 1] <- breakp res[t, 2] <- rho.emp # remove(breakp, pos.zero, rho.emp) } colnames(res) <- c("breakp", "rho.emp") res } f.under.overstorey <- function(df){ # Function to split a diameter sample with the breakpoint (f.breakp) in under- and overstorey # df is a list with NFI samples including dbh (diameter) and repfa (representation factor) # zero probabilities in the histogram are used to identify the breakpoint(s) res <- f.breakp(df) breakpoints <- res[, "breakp"] d.understorey <- vector(mode = "list", length = length(which(!is.na(breakpoints)))) names(d.understorey) <- names(breakpoints[which(!is.na(breakpoints))]) d.overstorey <- vector(mode = "list", length = length(which(!is.na(breakpoints)))) names(d.overstorey) <- names(breakpoints[which(!is.na(breakpoints))]) idx <- 1 for (n in which(!is.na(breakpoints))){ # use only cases where breakpoint estimation was successful d.understorey[[idx]] <- list(dbh.under = df[[n]]$dbh[df[[n]]$dbh + 0.5 <= breakpoints[n]], repfa.under = df[[n]]$repfa[df[[n]]$dbh + 0.5 <= breakpoints[n]]) d.overstorey[[idx]] <- list(dbh.over = df[[n]]$dbh[df[[n]]$dbh + 0.5 > breakpoints[n]], repfa.over = df[[n]]$repfa[df[[n]]$dbh + 0.5 > breakpoints[n]]) idx <- idx + 1 } # delete all plots where under- or overstorey contains only 1 tree (without repfa) d.understorey.red <- d.understorey[which(sapply(d.understorey, function(x) length(x$dbh)) != 1 & sapply(d.overstorey, function(x) length(x$dbh)) != 1)] d.overstorey.red <- d.overstorey[which(sapply(d.understorey, function(x) length(x$dbh)) != 1 & sapply(d.overstorey, function(x) length(x$dbh)) != 1)] breakpoints.red <- breakpoints[names(breakpoints) %in% names(d.understorey.red)] # recombine under- and overstorey d.under.overstorey <- mapply(c, d.understorey.red, d.overstorey.red, SIMPLIFY=FALSE) d.under.overstorey } f.calc.bi <- function(df){ # Helper function to calculate stand variables separated in under- and overstorey (bimodal case) # df is a dataframe with diameter (dbh) separated in under- and overstorey + associated repfa (representation factor) nph <- sum(c(df$repfa.under, df$repfa.over)) # number of trees per hectare qmd <- sqrt(weighted.mean( c(df$dbh.under + 0.5, df$dbh.over + 0.5)^2, c(df$repfa.under, df$repfa.over))) # quadratic mean diameter basfph <- pi/1e4 * sum( ( (c(df$dbh.under + 0.5, df$dbh.over + 0.5))/2 )^2 * c(df$repfa.under, df$repfa.over) ) # basal area per hectare spr <- max(df$dbh.over) - min(df$dbh.under) # spread between lowest and highest diameter # rho.emp is the amount of trees in the understorey in relation to the overstorey (weighted with repfa) rho.emp <- length((rep(df$dbh.under, df$repfa.under))) / (length(rep(df$dbh.over, df$repfa.over))+length(rep(df$dbh.under, df$repfa.under))) # Different quantiles of the diameter distribution dq10 <- quantile(rep(c(df$dbh.under + 0.5, df$dbh.over + 0.5), c(df$repfa.under, df$repfa.over)), 0.10)[[1]] # 10% quantile of the diameter dq90 <- quantile(rep(c(df$dbh.under + 0.5, df$dbh.over + 0.5), c(df$repfa.under, df$repfa.over)), 0.90)[[1]] # 90% quantile of the diameter md <- weighted.mean(rep(c(df$dbh.under + 0.5, df$dbh.over + 0.5), c(df$repfa.under, df$repfa.over))) # mean diameter medid <- median(rep(c(df$dbh.under + 0.5, df$dbh.over + 0.5), c(df$repfa.under, df$repfa.over))) # median diameter # understorey u.nph <- sum(df$repfa.under) u.qmd <- sqrt(weighted.mean( (df$dbh.under + 0.5)^2, df$repfa.under)) u.basfph <- pi/1e4 * sum( ((df$dbh.under + 0.5)/2)^2 * df$repfa.under ) u.spr <- max(df$dbh.under) - min(df$dbh.under) # overstorey o.nph <- sum(df$repfa.over) o.qmd <- sqrt(weighted.mean( (df$dbh.over + 0.5)^2, df$repfa.over)) o.basfph <- pi/1e4 * sum( ((df$dbh.over + 0.5)/2)^2 * df$repfa.over ) o.spr <- max(df$dbh.over) - min(df$dbh.over) c(spr=spr, lnph=log10(nph), qmd=qmd, basfph=basfph, md=md, medid=medid, dq10=dq10, dq90=dq90, u.spr=u.spr, u.lnph=log10(u.nph), u.qmd=u.qmd, u.basfph=u.basfph, o.spr=o.spr, o.lnph=log10(o.nph), o.qmd=o.qmd, o.basfph=o.basfph, rho.emp=rho.emp) } f.calc.uni <- function(df){ # Helper function to calculate stand variables for the unimodal case # df is a dataframe with diameter (dbh) and associated repfa (representation factor) nph <- sum(df$repfa) # number of stems per hectare qmd <- sqrt( weighted.mean( (df$dbh + 0.5)^2, df$repfa ) ) # quadratic mean diameter basfph <- pi/1e4 * sum( ((df$dbh + 0.5)/2)^2 * df$repfa ) # basal area per hectare dq10 <- quantile(rep(df$dbh + 0.5, df$repfa), 0.10)[[1]] # 10% quantile of the diameter dq90 <- quantile(rep(df$dbh + 0.5, df$repfa), 0.90)[[1]] # 90% quantile of the diameter md <- weighted.mean(rep(df$dbh + 0.5, df$repfa)) # mean diameter medid <- median(rep(df$dbh + 0.5, df$repfa)) # median diameter spr <- max(df$dbh) - min(df$dbh) # spread between lowest and highest diameter c(spr=spr, lnph=log10(nph), qmd=qmd, basfph=basfph, md=md, medid=medid, dq10=dq10, dq90=dq90) }