6 Case Study: French-Severn Forest
Now that we have walked through the segmentation and imputation processes, we can do an additional case study by applying the same algorithms and code to the French-Severn Forest in the Great Lakes region of Ontario. Instead of separating the code into distinct sections, we will run the entire segmentation and imputation codes as single chunks and present results.
6.1 Segmentation
6.1.1 Run Algorithm
##################################
### INSTALL PACKAGES IF NEEDED ###
##################################
# install.packages(c('terra',
# 'tidyverse',
# 'exactextractr',
# 'sf',
# 'janitor',
# 'berryFunctions',
# 'lwgeom',
# 'magrittr',
# 'gridExtra',
# 'knitr'))
# make sure to have OTB installed from here:
# https://www.orfeo-toolbox.org/
#####################
### LOAD PACKAGES ###
#####################
# load packages
library(terra)
library(tidyverse)
library(exactextractr)
library(sf)
library(janitor)
library(berryFunctions)
library(lwgeom)
library(magrittr)
library(gridExtra)
library(knitr)
####################################
### SET CODE AND FILE PARAMETERS ###
####################################
# set file names for ALS input variables
# these should be gridded raster data with the same CRS and extent
# p95 = 95th percentile of returns height > 1.3 m
# cc = canopy cover (% of firest returns > 2 m)
# cv = coefficient of variation of returns height > 1.3 m
<- 'D:/ontario_inventory/FSF/ALS/zq95.img'
p95_f <- 'D:/ontario_inventory/FSF/ALS/cov_2m.img'
cc_f <- 'D:/ontario_inventory/FSF/ALS/cv.img'
cv_f
# set file location of roads shape file (spatial lines)
# roads will be polygonized, masked from segmentation
# and re-added to final dataset as polygons
<-
roads_f 'D:/ontario_inventory/FSF/roads/FS_roads.shp'
# set file location of FRI polygons shape file
# FRI POLYTYPE should have a "WAT" classification to mask water polygons
<- 'D:/ontario_inventory/FSF/FRI/FSF_opi_polygon_CSRS_NAD83_17.shp'
fri
# set output folder for files generated
# make sure no "/" at end of folder location!
<- 'C:/Users/bermane/Desktop/FSF'
out_dir
# set folder location of OTB (where you installed OTB earlier)
<- "C:/OTB/bin"
otb_dir
# set GRM segmentation parameters
# the default are listed below
# refer to paper or OTB GRM webpage for description of parameters
<- "10"
thresh <- "0.1"
spec <- "0.5"
spat
# set file location of 2018 VLCE 2.0 landcover data
# using 2018 because it is the year of Romeo ALS acquisition
# can change based on ALS acquisition year
# download here:
# https://opendata.nfis.org/mapserver/nfis-change_eng.html
<- 'D:/ontario_inventory/VLCE/CA_forest_VLCE2_2018.tif'
lc_f
###################################################
### LOAD MULTI BAND ALS RASTER FOR SEGMENTATION ###
###################################################
# stack rasters
<- rast(c(p95_f, cc_f, cv_f))
spl
# apply smoothing function on 5 cell square
1]] <- focal(spl[[1]], w = 5, fun = "mean")
spl[[2]] <- focal(spl[[2]], w = 5, fun = "mean")
spl[[3]] <- focal(spl[[3]], w = 5, fun = "mean")
spl[[
# create ALS template with all values equal to 1
<- spl[[1]]
spl_temp <- 1
spl_temp[]
##################
### MASK ROADS ###
##################
# load roads layer
<- vect(roads_f)
roads
# reproject to match lidar
<- project(roads, spl)
roads
# create roads polygon
<- mask(spl_temp, roads, touches = T)
spl_r <- sum(values(spl_r), na.rm = T)
npix <- as.polygons(spl_r)
spl_r names(spl_r) <- 'POLYTYPE'
$POLYTYPE <- 'RDS'
spl_r$nbPixels <- npix
spl_r
# mask road pixels to NA
<- spl %>%
spl mask(., roads, inverse = T, touches = T)
###########################
### MASK WATER POLYGONS ###
###########################
# water polygons from the FRI are masked and re-added after segmentation
# load photo interpreted polygons
<- vect(fri)
poly
# subset polygons that are WAT
<- poly[poly$POLYTYPE %in% c('WAT')]
poly_sub
# reproject to match lidar
<- project(poly_sub, spl)
poly_sub
# loop through water polygons, mask raster, and vectorize
for (i in 1:length(poly_sub)) {
<- poly_sub$POLYTYPE[i]
pt if (i == 1) {
<- spl_temp %>% crop(poly_sub[i], snap = 'out') %>%
spl_pt mask(poly_sub[i], touches = T)
<- sum(values(spl_pt), na.rm = T)
npix <- as.polygons(spl_pt)
spl_pt names(spl_pt) <- 'POLYTYPE'
$POLYTYPE <- pt
spl_pt$nbPixels <- npix
spl_ptelse{
} if (is.error(spl_temp %>% crop(poly_sub[i], snap = 'out') %>%
mask(poly_sub[i], touches = T)) == F) {
<- spl_temp %>% crop(poly_sub[i], snap = 'out') %>%
spl_hold mask(poly_sub[i], touches = T)
<- sum(values(spl_hold), na.rm = T)
npix <- as.polygons(spl_hold)
spl_hold names(spl_hold) <- 'POLYTYPE'
$POLYTYPE <- pt
spl_hold$nbPixels <- npix
spl_hold<- rbind(spl_pt, spl_hold)
spl_pt
}
}
}
# reproject whole FRI to match lidar
<- project(poly, spl)
poly
# mask lidar outside of FRI
<- mask(spl, poly, inverse = F, touches = T)
spl
# mask WAT polygons
<- mask(spl, poly_sub, inverse = T, touches = T)
spl
###############################################
### COMBINE ROAD AND WATER POLYGON DATASETS ###
###############################################
<- rbind(spl_pt, spl_r)
spl_pt
##########################################
### DEAL WITH MISSING DATA AND RESCALE ###
##########################################
# if any band is missing values set all to NA
is.na(spl[[1]])] <- NA
spl[is.na(spl[[2]])] <- NA
spl[is.na(spl[[3]])] <- NA
spl[
# create function to rescale values from 0 to 100 using 1 and 99 percentile
<- function(x) {
scale_100 # calculate 1st and 99th percentile of input raster
<-
perc values(x, mat = F) %>% quantile(., probs = c(0.01, 0.99), na.rm = T)
# rescale raster using 1st and 99th %
<- (x - perc[1]) / (perc[2] - perc[1]) * 100
x
#reset values below 0 and above 100
< 0] <- 0
x[x > 100] <- 100
x[x
return(x)
}
# rescale rasters from 0 to 100
1]] <- scale_100(spl[[1]])
spl[[2]] <- scale_100(spl[[2]])
spl[[3]] <- scale_100(spl[[3]])
spl[[
# check if main dir exists and create
if (dir.exists(out_dir) == F) {
dir.create(out_dir)
}
# check if temp dir exists and create
if (dir.exists(file.path(out_dir, 'temp')) == F) {
dir.create(file.path(out_dir, 'temp'))
}
# write raster to tif
writeRaster(spl,
filename = str_c(out_dir, '/temp/spl_stack.tif'),
overwrite = T)
# write spl_pt
writeVector(spl_pt,
str_c(out_dir, '/temp/water_roads_polygons.shp'),
overwrite = T)
# SET PARAMETERS
<- str_c(out_dir, '/temp/spl_stack.tif')
rast_in <- str_c(out_dir, '/temp')
out_p <- str_c(
name_out 'grm_',
thresh,'_',
gsub(".", "", spec, fixed = TRUE),
'_',
gsub(".", "", spat, fixed = TRUE)
)
# create function to run generic region merging
<-
grm_otb function(otb_path = "",
raster_in = "",
out_path = "",
name = "",
method = "bs",
thresh = "",
spec = "0.5",
spat = "0.5") {
# Set configuration
<-
conf paste(
"-in",
raster_in,"-out",
paste(out_path, "/", name, ".tif", sep = ""),
"-criterion",
method,"-threshold",
thresh,"-cw",
spec,"-sw",
spat
)
# apply function in command line
system(paste(otb_path, "/otbcli_GenericRegionMerging", " ", conf, sep =
""))
# save configuration for further use
write.table(
x = conf,
file = paste(out_path, "/", name, "_conf.txt", sep = ""),
row.names = F,
col.names = F
)
}
# run grm
grm_otb(
otb_path = otb_dir,
raster_in = rast_in,
out_path = out_p,
name = name_out,
thresh = thresh,
spec = spec,
spat = spat
)
###########################
### MASK MISSING VALUES ###
###########################
# load grm raster
<- rast(str_c(out_p, "/", name_out, ".tif"))
p
# load seg raster
<- rast(rast_in) %>% .[[1]]
mask
# mask grm raster
<- mask(p, mask)
p
# write grm raster
writeRaster(p, paste(out_p, "/", name_out, ".tif", sep = ""),
overwrite = T)
# convert to vector based on cell value
<- as.polygons(p)
vec
# create table of number of pixels in each polygon
<- as.vector(values(p))
num <- tabyl(num)
num_pix
# drop na row
<- na.omit(num_pix)
num_pix
# get pixel ids from vector
<- tibble(id = values(vec)[, 1])
vec_dat colnames(vec_dat) <- 'id'
# loop through values and add to vector data
$nbPixels <- NA
vec_datfor (i in 1:NROW(vec_dat)) {
$nbPixels[i] <- num_pix$n[num_pix$num == vec_dat$id[i]]
vec_dat
}
# remove current column of data and add id
# add nbPixels to vector
<- vec[, -1]
vec $id <- vec_dat$id
vec$nbPixels <- vec_dat$nbPixels
vec
##################################
### ADD PRE-ALLOCATED POLYGONS ###
##################################
# load polygon dataset
<- vec
p
# reproject segmented polygons to ensure same crs
<- project(p, spl_pt)
p
# add non-FOR POLYTYPE polygons back in
<- rbind(p, spl_pt)
p2
#####################
### ADD LANDCOVER ###
#####################
# load VLCE 2.0 landcover dataset
<- rast(lc_f)
lc
# project polygons to CRS of raster
<- project(p2, lc)
p_lc
# crop raster
<- crop(lc, p_lc)
lc
# convert to sf
<- st_as_sf(p_lc)
p_lcsf
# extract landcover values
<- exact_extract(lc, p_lcsf)
lc_vals
# set landcover class key
<- c(`0` = 'NA',
lc_key `20` = 'Water',
`31` = 'Snow/Ice',
`32` = 'Rock/Rubble',
`33` = 'Exposed/Barren Land',
`40` = 'Bryoids',
`50` = 'Shrubland',
`80` = 'Wetland',
`81` = 'Wetland-Treed',
`100` = 'Herbs',
`210` = 'Coniferous',
`220` = 'Broadleaf',
`230` = 'Mixed Wood')
# find dominant lc type in each polygon
# if there are multiple modes keep them
# apply over list
<- sapply(lc_vals, function(x){
lc_mode $value <- recode(x$value, !!!lc_key)
x<- x %>% group_by(value) %>% summarize(sum = sum(coverage_fraction))
x <- x$value[which(x$sum == max(x$sum))]
m # m <- get_mode2(x$value[x$coverage_fraction >= cov_frac])
return(paste(m, collapse = " "))
})
# add to polygon dataset
$dom_lc <- lc_mode
p2
# set landcover class key with single forested class
<- c(`0` = 'NA',
lc_key_for `20` = 'Water',
`31` = 'Snow/Ice',
`32` = 'Rock/Rubble',
`33` = 'Exposed/Barren Land',
`40` = 'Bryoids',
`50` = 'Shrubland',
`80` = 'Wetland',
`81` = 'Forest',
`100` = 'Herbs',
`210` = 'Forest',
`220` = 'Forest',
`230` = 'Forest')
# find pixels with forest at least 50% of pixel
# apply over list
<- sapply(lc_vals, function(x){
lc_dom_for $value <- recode(x$value, !!!lc_key_for)
x<- x %>% group_by(value) %>% summarize(sum = sum(coverage_fraction))
x <- x$value[which(x$sum == max(x$sum))]
m if((length(m) == 1) & (m == 'Forest')[1]){
if(x$sum[x$value == m]/sum(x$sum) >= 0.5){
return('Yes')
else{return('No')}
}else{return('No')}
}
})
# add to polygon dataset
$dom_for <- lc_dom_for
p2
##############################
### ADD AREA AND PERIMETER ###
##############################
# convert to sf
<- st_as_sf(p2)
p2_sf
# calculate perimeter
$perim <- st_perimeter(p2_sf) %>% as.numeric
p2
# calculate area
$area <- st_area(p2_sf) %>% as.numeric
p2
# write to file
writeVector(p2, str_c(out_dir, "/", name_out, ".shp"),
overwrite = T)
###########################################
### EXTRACT FINAL POLYGON SUMMARY STATS ###
###########################################
# create list of polygon files, names and parameters
<- str_c(out_dir, "/", name_out, ".shp")
file <- out_dir
out_loc <- str_c(out_dir, '/temp/spl_stack.tif')
grm_input <- name_out
name
# create standard error function
<- function(x)
se sd(x) / sqrt(length(x))
# load file
<- vect(file)
p
# convert to sf
<- st_as_sf(p)
p_sf
# subset non masked WAT and RD polygons
<- p[is.na(p$POLYTYPE)] %>% st_as_sf
p2_sf <- p[is.na(p$POLYTYPE)] %>% as.data.frame
p2
# calculate perimeter to area ratio
$p_to_a <- p2$perim / p2$area
p2$p_to_a <- round(p2$p_to_a, 3)
p2
# calculate msi
$msi <- p2$perim / sqrt(pi * p2$area)
p2
# load original raster input file
<- rast(grm_input)
ras
# rename bands
names(ras) <- c('p95', 'cc', 'cv')
# extract pixel values
<- exact_extract(ras, p2_sf)
pvals
# calculate SSE
<- sapply(
sse
pvals,FUN = function(x) {
<- mean(x$p95, na.rm = T)
p95_mean <- mean(x$cc, na.rm = T)
cc_mean <- mean(x$cv, na.rm = T)
cv_mean
return(c(sum((x$p95 - p95_mean) ^ 2, na.rm = T),
sum((x$cc - cc_mean) ^ 2, na.rm = T),
sum((x$cv - cv_mean) ^ 2, na.rm = T)))
}
)
# transpose
<- t(sse)
sse
# calculate final sums
<- colSums(sse)
sse
# unlist values
<- do.call(rbind, pvals)
pvals2
# calculate global mean values
<- mean(pvals2$p95, na.rm = T)
p95_mean <- mean(pvals2$cc, na.rm = T)
cc_mean <- mean(pvals2$cv, na.rm = T)
cv_mean
rm(pvals2)
# calculate SST
<- sapply(
sst
pvals,FUN = function(x) {
return(c(sum((x$p95 - p95_mean) ^ 2, na.rm = T),
sum((x$cc - cc_mean) ^ 2, na.rm = T),
sum((x$cv - cv_mean) ^ 2, na.rm = T)))
}
)
# transpose
<- t(sst)
sst
# calculate final sums
<- colSums(sst)
sst
# calculate r2 values
<- 1 - (sse[1] / sst[1]) %>% round(3)
r2_p95 <- 1 - (sse[2] / sst[2]) %>% round(3)
r2_cc <- 1 - (sse[3] / sst[3]) %>% round(3)
r2_cv <- (sum(r2_p95, r2_cc, r2_cv) / 3) %>% round(3)
r2_all
# create dataframe with values wanted
<- data.frame(
df alg = name,
min_pix = (min(p2$nbPixels)),
max_pix = (max(p2$nbPixels)),
mean_pix = (mean(p2$nbPixels)),
med_pix = (median(p2$nbPixels)),
num_poly = NROW(p2),
mean_area = mean(p2$area),
se_area = se(p2$area),
sd_area = sd(p2$area),
mean_perim = mean(p2$perim),
se_perim = se(p2$perim),
sd_perim = sd(p2$perim),
mean_p_a = mean(p2$p_to_a),
se_p_a = se(p2$p_to_a),
sd_p_a = sd(p2$p_to_a),
mean_msi = mean(p2$msi),
se_msi = se(p2$msi),
sd_msi = sd(p2$msi),
r2_p95 = r2_p95,
r2_cc = r2_cc,
r2_cv = r2_cv,
r2_all = r2_all
)
# round numeric columns
%<>%
df mutate_at(c(
'min_pix',
'max_pix',
'mean_pix',
'med_pix',
'mean_area',
'se_area',
'sd_area',
'mean_perim',
'se_perim',
'sd_perim'
),function(x)
round(x, 2)) %>%
mutate_at(c('mean_p_a',
'se_p_a',
'sd_p_a',
'mean_msi',
'se_msi',
'sd_msi'),
function(x)
round(x, 4))
#####################
### ADD FRI STATS ###
#####################
# load interpreter derived polygons to extract statistics
<- vect(fri)
pfri
# convert to sf
<- st_as_sf(pfri)
pfri_sf
# calculate perimeter
$perim <- st_perimeter(pfri_sf) %>% as.numeric
pfri
# calculate area
$area <- st_area(pfri_sf) %>% as.numeric
pfri
# calculate nbPixels
$nbPixels <- pfri$area / 400
pfri
# calculate perimeter to area ratio
$p_to_a <- pfri$perim / pfri$area
pfri$p_to_a <- round(pfri$p_to_a, 3)
pfri
# subset all non water/ucl polygons
<- pfri[!(pfri$POLYTYPE %in% c('WAT', 'UCL'))] %>% st_as_sf
p2fri_sf <- pfri[!(pfri$POLYTYPE %in% c('WAT', 'UCL'))] %>% as.data.frame
p2fri
# calculate msi
$msi <- p2fri$perim / sqrt(pi * p2fri$area)
p2fri
# load original raster input file
<- rast(grm_input)
ras
# rename bands
names(ras) <- c('p95', 'cc', 'cv')
# extract pixel values
<- exact_extract(ras, p2fri_sf)
pvals
# calculate SSE
<- sapply(
sse
pvals,FUN = function(x) {
# subset values based on coverage fraction
%<>% filter(coverage_fraction >= 0.5)
x
<- mean(x$p95, na.rm = T)
p95_mean <- mean(x$cc, na.rm = T)
cc_mean <- mean(x$cv, na.rm = T)
cv_mean
return(c(sum((x$p95 - p95_mean) ^ 2, na.rm = T),
sum((x$cc - cc_mean) ^ 2, na.rm = T),
sum((x$cv - cv_mean) ^ 2, na.rm = T)))
}
)
# transpose
<- t(sse)
sse
# calculate final sums
<- colSums(sse)
sse
# unlist values
<- do.call(rbind, pvals)
pvals2
# subset values based on coverage fraction
%<>% filter(coverage_fraction >= 0.5)
pvals2
# calculate global mean values
<- mean(pvals2$p95, na.rm = T)
p95_mean <- mean(pvals2$cc, na.rm = T)
cc_mean <- mean(pvals2$cv, na.rm = T)
cv_mean
rm(pvals2)
# calculate SST
<- sapply(
sst
pvals,FUN = function(x) {
# subset values based on coverage fraction
%<>% filter(coverage_fraction >= 0.5)
x
return(c(sum((x$p95 - p95_mean) ^ 2, na.rm = T),
sum((x$cc - cc_mean) ^ 2, na.rm = T),
sum((x$cv - cv_mean) ^ 2, na.rm = T)))
}
)
# transpose
<- t(sst)
sst
# calculate final sums
<- colSums(sst)
sst
# calculate r2 values
<- 1 - (sse[1] / sst[1]) %>% round(3)
r2_p95 <- 1 - (sse[2] / sst[2]) %>% round(3)
r2_cc <- 1 - (sse[3] / sst[3]) %>% round(3)
r2_cv <- (sum(r2_p95, r2_cc, r2_cv) / 3) %>% round(3)
r2_all
# create dataframe with values wanted
<- data.frame(
ms_df alg = 'FRI',
min_pix = (min(p2fri$area / 400)),
max_pix = (max(p2fri$area / 400)),
mean_pix = (mean(p2fri$area / 400)),
med_pix = (median(p2fri$area / 400)),
num_poly = NROW(p2fri),
mean_area = mean(p2fri$area),
se_area = se(p2fri$area),
sd_area = sd(p2fri$area),
mean_perim = mean(p2fri$perim),
se_perim = se(p2fri$perim),
sd_perim = sd(p2fri$perim),
mean_p_a = mean(p2fri$p_to_a),
se_p_a = se(p2fri$p_to_a),
sd_p_a = sd(p2fri$p_to_a),
mean_msi = mean(p2fri$msi),
se_msi = se(p2fri$msi),
sd_msi = sd(p2fri$msi),
r2_p95 = r2_p95,
r2_cc = r2_cc,
r2_cv = r2_cv,
r2_all = r2_all
)
# round numeric columns
%<>%
ms_df mutate_at(c('min_pix',
'max_pix',
'mean_pix',
'med_pix'),
function(x)
round(x)) %>%
mutate_at(c(
'mean_area',
'se_area',
'sd_area',
'mean_perim',
'se_perim',
'sd_perim'
),function(x)
round(x, 2)) %>%
mutate_at(c('mean_p_a',
'se_p_a',
'sd_p_a',
'mean_msi',
'se_msi',
'sd_msi'),
function(x)
round(x, 4))
# bind df
<- rbind(df, ms_df)
df
# write df as csv
write.csv(df,
file = str_c(out_loc, '/summary_stats.csv'),
row.names = F)
6.1.2 Results: Summary Statistics
Polygon size statistics:
###########################################
### Summary Stats and Additional Tables ###
###########################################
# Size
<- as_tibble(df[, 1:6])
t1
# change to ha
2:5] <- round(t1[, 2:5] / 25, 2)
t1[,
names(t1) <- c('Dataset', "Min Ha", 'Max Ha', "Mean Ha",
"Median Ha", "Number of Polygons")
::kable(t1, caption = "Table 1: Polygon Size Stats", label = NA) knitr
Dataset | Min Ha | Max Ha | Mean Ha | Median Ha | Number of Polygons |
---|---|---|---|---|---|
grm_10_01_05 | 0.04 | 60.00 | 4.71 | 3.40 | 223321 |
FRI | 0.00 | 821.08 | 5.24 | 1.16 | 207121 |
The FSF is much larger than the RMF, with over twice as many polygons. The mean polygon size is around ~5 Ha for both the GRM polygons and the FRI, but the median polygon size is much smaller in the FRI. This tells us that in the FSF FRI there are more small polygons, but also a number of polygons with a large size, compared to GRM segmentation.
Area and perimeter:
# Area and Perim
<- as_tibble(df[, c(1, 7:12)])
t2
# change area to Ha
2:4] <- round(t2[,2:4] / 10000, 2)
t2[,
names(t2) <- c("Dataset", "Mean Area (Ha)", "SE Area", "SD Area",
"Mean Perimeter (m)", "SE Perimeter", "SD Perimeter")
::kable(t2, caption = "Table 2: Polygon Area and Perimeter Stats", label = NA) knitr
Dataset | Mean Area (Ha) | SE Area | SD Area | Mean Perimeter (m) | SE Perimeter | SD Perimeter |
---|---|---|---|---|---|---|
grm_10_01_05 | 4.71 | 0.01 | 4.27 | 1155.00 | 1.09 | 517.22 |
FRI | 5.25 | 0.02 | 10.49 | 1257.18 | 3.78 | 1719.51 |
GRM mean area and perimeter are smaller than the FRI, with less variability.
Shape index:
# Mean Shape Index
<- as_tibble(df[, c(1, 16:18)])
t3 names(t3) <- c("Dataset", "Mean Shape Index", "SE Shape Index", "SD Shape Index")
::kable(t3, caption = "Table 3: Polygon Shape Index Stats", label = NA) knitr
Dataset | Mean Shape Index | SE Shape Index | SD Shape Index |
---|---|---|---|
grm_10_01_05 | 3.3014 | 0.0015 | 0.6915 |
FRI | 4.1344 | 0.0035 | 1.6105 |
GRM polygons are more compact.
R2:
# R2
<- as_tibble(df[,c(1, 19:22)])
t4
# round
2:5] <- round(t4[,2:5], 2)
t4[,
names(t4) <- c('Dataset', 'R2 P95', 'R2 Can Cov', 'R2 Coeff Var', 'R2 Overall')
::kable(t4, caption = "Table 4: Polygon R2 Stats", label = NA) knitr
Dataset | R2 P95 | R2 Can Cov | R2 Coeff Var | R2 Overall |
---|---|---|---|---|
grm_10_01_05 | 0.88 | 0.90 | 0.88 | 0.88 |
FRI | 0.82 | 0.82 | 0.77 | 0.80 |
GRM polygons have an overall R2 value of 0.88 compared to 0.80 from the FRI.
6.1.3 Results: Distribution Plots
Polygon size:
# plot density GRM in ha
<- ggplot(data.frame(nbPixels = p2$nbPixels / 25), aes(x = nbPixels)) +
g1 geom_density(fill = 'grey') +
xlim(c(0, 500/25)) +
ylim(c(0, 0.60)) +
geom_vline(aes(xintercept = median(nbPixels)),
linetype = "dashed",
linewidth = 0.6) +
theme_bw() +
xlab('Number of Hectares') +
ylab('Density') +
ggtitle('GRM Distribution of Polygon Size') +
theme(text = element_text(size = 25))
# plot density FRI in ha
<- ggplot(data.frame(nbPixels = p2fri$nbPixels / 25), aes(x = nbPixels)) +
g2 geom_density(fill = 'grey') +
xlim(c(0, 500/25)) +
ylim(c(0, 0.60)) +
geom_vline(aes(xintercept = median(nbPixels)),
linetype = "dashed",
linewidth = 0.6) +
theme_bw() +
xlab('Number of Hectares') +
ylab('Density') +
ggtitle('FRI Distribution of Polygon Size') +
theme(text = element_text(size = 25))
# plot together
grid.arrange(g1, g2)
The density plots of polygon size give us a good indication of how the datasets differ. GRM polygons have a much larger density of polygons in the ~2-6 Ha range, whereas the FRI has a large number of small polygons, and also a fatter tail of ~10-20 Ha polygons.
Shape index:
# plot shape index GRM
<- ggplot(data.frame(msi = as.numeric(p2$msi)), aes(x = msi)) +
g1 geom_density(fill = 'grey') +
xlim(c(0, 8)) +
ylim(c(0, 1.5)) +
geom_vline(aes(xintercept = median(msi)),
linetype = "dashed",
linewidth = 0.6) +
theme_bw() +
xlab('Shape Index') +
ylab('Density') +
ggtitle('GRM Distribution of Shape Index') +
theme(text = element_text(size = 25))
# plot shape index FRI
<- ggplot(data.frame(msi = as.numeric(p2fri$msi)), aes(x = msi)) +
g2 geom_density(fill = 'grey') +
xlim(c(0, 8)) +
ylim(c(0, 1.5)) +
geom_vline(aes(xintercept = median(msi)),
linetype = "dashed",
linewidth = 0.6) +
theme_bw() +
xlab('Shape Index') +
ylab('Density') +
ggtitle('FRI Distribution of Shape Index') +
theme(text = element_text(size = 25))
# plot together
grid.arrange(g1, g2)
We can see the distribution of polygon shape is more compact among GRM polygons. Remember that we are using the same GRM algorithm parameters as were used in segmenting the RMF. These parameters can be manipulated to meet the needs of various forested areas with different forest characteristics.
6.2 Imputation Over FRI
6.2.1 Run Algorithm
##################################
### INSTALL PACKAGES IF NEEDED ###
##################################
# install.packages(c('terra',
# 'tidyverse',
# 'exactextractr',
# 'sf',
# 'magrittr',
# 'gridExtra',
# 'RANN',
# 'reshape2',
# 'viridis',
# 'scales',
# 'janitor',
# 'kableExtra',
# 'knitr'))
# make sure to have OTB installed from here:
# https://www.orfeo-toolbox.org/
#####################
### LOAD PACKAGES ###
#####################
# load packages
library(terra)
library(tidyverse)
library(exactextractr)
library(sf)
library(magrittr)
library(gridExtra)
library(RANN)
library(reshape2)
library(viridis)
library(scales)
library(janitor)
library(kableExtra)
library(knitr)
####################################
### SET CODE AND FILE PARAMETERS ###
####################################
# set file names for ALS input variables
# first set the imputation X-variables
<- c('agb' = 'D:/ontario_inventory/FSF/EFI/Biomass.img',
lidar_imp 'gmvwl' = 'D:/ontario_inventory/FSF/EFI/GMV_WL.img',
'pcum8' = 'D:/ontario_inventory/FSF/ALS/zpcum8.img',
'ba' = 'D:/ontario_inventory/FSF/EFI/BasalArea.img',
'b6' = 'D:/ontario_inventory/FSF/sentinel/boa/b6_fsf_2018.tif')
# next set the data screening variables
<- c('p95' = 'D:/ontario_inventory/FSF/ALS/zq95.img',
lidar_scr 'cc' = 'D:/ontario_inventory/FSF/ALS/cov_2m.img')
# set file location of FRI polygons shape file
<- 'D:/ontario_inventory/FSF/FRI/FSF_opi_polygon_CSRS_NAD83_17.shp'
fri
# set file location of GRM polygons shape file
<- 'C:/Users/bermane/Desktop/FSF/grm_10_01_05.shp'
grm
# set output folder for files generated
# make sure no "/" at end of folder location!
<- 'C:/Users/bermane/Desktop/FSF'
out_dir
# set file location of 2018 VLCE 2.0 landcover data
# using 2018 because it is the year of Romeo ALS acquisition
# can change based on ALS acquisition year
# download here:
# https://opendata.nfis.org/mapserver/nfis-change_eng.html
<- 'D:/ontario_inventory/VLCE/CA_forest_VLCE2_2018.tif'
lc_f
###########################################
### EXTRACT VARIABLES INTO FRI POLYGONS ###
###########################################
# load FRI polygons
<- vect(fri)
poly
# convert to df
<- as.data.frame(poly)
dat_fri
# cbind centroids to dat
<- cbind(dat_fri, centroids(poly) %>% crds)
dat_fri
# combine all LiDAR and aux variables to extract
<- c(lidar_imp, lidar_scr)
lidar_vars
# loop through LiDAR attributes to extract values
for (i in seq_along(lidar_vars)) {
# load LiDAR raster
<- rast(lidar_vars[i])
lidar_ras
# project poly to crs of raster
<- project(poly, lidar_ras)
poly_ras
# convert to sf
<- st_as_sf(poly_ras)
poly_ras
#extract median values
<-
vec exact_extract(lidar_ras, poly_ras, 'median')
# aggregate into data frame
if(i == 1){
<- as.data.frame(vec)
vec_df else{
} <- cbind(vec_df, as.data.frame(vec))
vec_df
}
}
# change column names of extracted attribute data frame
colnames(vec_df) <- names(lidar_vars)
# add LiDAR attributes to FRI polygon data frame
<- cbind(dat_fri, vec_df)
dat_fri
# add 2018 age values
$AGE2018 <- 2018 - dat_fri$YRORG
dat_fri
# check if main dir exists and create
if (dir.exists(out_dir) == F) {
dir.create(out_dir)
}
# check if temp dir exists and create
if (dir.exists(file.path(out_dir, 'temp')) == F) {
dir.create(file.path(out_dir, 'temp'))
}
# save extracted dataframe for fast rebooting
save(dat_fri, file = str_c(out_dir, '/temp/dat_fri_extr.RData'))
# load GRM segmented polygons
<- vect(grm)
poly
# reproject to match FRI polygons
<- project(poly, vect(fri))
poly
# convert to df
<- as.data.frame(poly)
dat_grm
# cbind centroids to dat
<- cbind(dat_grm, centroids(poly) %>% crds)
dat_grm
# loop through LiDAR attributes to extract values
for (i in seq_along(lidar_vars)) {
# load LiDAR raster
<- rast(lidar_vars[i])
lidar_ras
# project poly to crs of raster
<- project(poly, lidar_ras)
poly_ras
# convert to sf
<- st_as_sf(poly_ras)
poly_ras
#extract median values
<-
vec exact_extract(lidar_ras, poly_ras, 'median')
# aggregate into data frame
if(i == 1){
<- as.data.frame(vec)
vec_df else{
} <- cbind(vec_df, as.data.frame(vec))
vec_df
}
}
# change column names of extracted attribute data frame
colnames(vec_df) <- names(lidar_vars)
# add LiDAR attributes to FRI polygon data frame
<- cbind(dat_grm, vec_df)
dat_grm
# save extracted dataframe for fast rebooting
save(dat_grm, file = str_c(out_dir, '/temp/dat_grm_extr.RData'))
############################################
### FUNCTIONS FOR SPECIES CLASSIFICATION ###
############################################
# assign species name -- note this list was updated with all species in FSF FRI.
# It may need to be adjusted for other areas
<- function(sp_abbrev) {
assign_common_name <- toupper(sp_abbrev)
sp_abbrev
<- data.frame(SB = "black spruce",
dict LA = "eastern larch",
BW = "white birch",
BF = "balsam fir",
CE = "cedar",
SW = "white spruce",
PT = "trembling aspen",
PJ = "jack pine",
PO = "poplar",
PB = "balsam poplar",
PR = "red pine",
PW = "white pine",
SX = "spruce",
MR = "red maple",
AB = "black ash",
BY = "yellow birch",
OR = 'red oak',
CW = 'eastern white cedar',
MH = 'hard maple',
HE = 'eastern hemlock',
BD = 'basswood',
CB = 'black cherry',
BE = 'american beech',
AW = 'white ash',
PL = 'largetooth aspen',
AG = 'red ash',
OW = 'white oak',
IW = 'ironwood',
OB = 'bur oak',
EW = 'white elm',
MS = 'silver maple',
PS = 'scots pine',
OH = 'other hardwoods',
BG = 'grey birch',
AL = 'alder',
SR = 'red spruce',
BB = 'blue beech',
MT = 'mountain maple',
MB = 'black maple',
OC = 'other conifers',
SN = 'norway spruce',
PE = 'silver poplar',
HI = 'hickory',
AX = 'ash') %>%
pivot_longer(everything(), names_to = "abb", values_to = "common")
$common[match(sp_abbrev, dict$abb)]
dict
}
# assign either coniferous or deciduous
<- function(sp_common) {
assign_type <- tolower(sp_common)
sp_common ifelse(stringr::str_detect(sp_common, pattern = "pine|spruce|fir|cedar|larch|conifers|hemlock"), "Coniferous", "Deciduous")
}
####################################
### CALCULATE SPECIES ATTRIBUTES ###
####################################
# load fri
<- st_read(fri)
poly_fri
# separate SPCOMP string into individual columns
<- poly_fri %>%
poly_fri_for st_drop_geometry() %>%
filter(POLYTYPE == "FOR") %>%
select(OPI_ID, POLYTYPE, OSPCOMP) %>% # these need to match FRI attr fields
mutate(new_SP = str_match_all(OSPCOMP, "[A-Z]{2}[ ]+[0-9]+")) %>%
unnest(new_SP) %>%
mutate(new_SP = as.character(new_SP)) %>%
separate(new_SP, into = c("SP", "PROP")) %>%
mutate(PROP = as.numeric(PROP),
Common = assign_common_name(SP),
sp_type = assign_type(Common))
# calculate polygon level species groups
# percent species type
<- poly_fri_for %>%
poly_dom_type group_by(OPI_ID) %>%
summarize(per_conif = sum(PROP[sp_type == "Coniferous"]),
per_decid = sum(PROP[sp_type == "Deciduous"]))
# leading species
<- poly_fri_for %>%
poly_dom_sp group_by(OPI_ID) %>%
slice_max(PROP, n = 1, with_ties = FALSE)
# combine type with leading species
<- inner_join(poly_dom_type, poly_dom_sp, by = "OPI_ID")
poly_dom_sp_group
# calculate functional groups
<- poly_dom_sp_group %>%
poly_dom_sp_group mutate(SpeciesGroup1 = ifelse(PROP >= 70, Common,
ifelse(PROP < 70 & per_conif >= 70, "Mixed Coniferous",
ifelse(PROP < 70 & per_decid >= 70, "Mixed Deciduous", "Mixedwoods"))),
SpeciesGroup2 = ifelse(Common == "jack pine" & PROP >= 50 & per_conif >= 70, "Jack Pine Dominated", ifelse(
== "black spruce" & PROP >= 50 & per_conif >= 70, "Black Spruce Dominated", ifelse(
Common >= 70, "Hardwood", ifelse(
per_decid >= 30 & per_decid <= 70 & per_conif >= 30 & per_conif <= 70, "Mixedwood", "Mixed Conifers"
per_decid
)))), SpeciesGroup3 = ifelse(per_conif >= 70, "Softwood",
ifelse(per_decid >= 70, "Hardwood", "Mixedwood"))) %>%
mutate(across(.cols = starts_with("SpeciesGroup"), .fns = as.factor)) %>%
select(OPI_ID, SpeciesGroup2, SpeciesGroup3) %>%
rename(class5 = SpeciesGroup2, class3 = SpeciesGroup3)
# calculate leading and second species only
<- poly_fri %>%
poly_fri_sp st_drop_geometry() %>%
filter(POLYTYPE == "FOR") %>%
select(OPI_ID, POLYTYPE, OSPCOMP) %>% # these need to match FRI attr fields
mutate(new_SP = str_match_all(OSPCOMP, "[A-Z]{2}[ ]+[0-9]+")) %>%
unnest_wider(new_SP, names_sep = '') %>%
rename(new_SP = new_SP1) %>%
mutate(new_SP = as.character(new_SP),
new_SP2 = as.character(new_SP2)) %>%
separate(new_SP, into = c("SP", "PROP")) %>%
separate(new_SP2, into = c("SP2", "PROP2")) %>%
select(OPI_ID, SP, SP2)
# join functional groups with leading and second species
<- left_join(poly_dom_sp_group,
poly_dom_sp_group
poly_fri_sp,by = 'OPI_ID') %>%
rename(SP1 = SP)
# join to FRI extracted dataframe
<- left_join(dat_fri,
dat_fri
poly_dom_sp_group,by = 'OPI_ID')
# re-save extracted dataframe for fast rebooting
save(dat_fri, file = str_c(out_dir, '/temp/dat_fri_extr.RData'))
#####################
### LOAD FRI DATA ###
#####################
# load FRI polygons
<- vect(fri)
poly
# load FRI polygon data frame
load(str_c(out_dir, '/temp/dat_fri_extr.RData'))
#############################
### DATA SCREENING PART 1 ###
#############################
# remove all non-forested polygons
<- filter(dat_fri, POLYTYPE == 'FOR')
dat_fri
# create smaller polygon set only FOR polytypes
<- poly[poly$POLYTYPE == 'FOR']
poly_fri
#############################
### DATA SCREENING PART 2 ###
#############################
# polygon landcover > 50% forested
# load VLCE 2.0 landcover dataset from 2018
<- rast(lc_f)
lc
# project poly to crs of raster
<- project(poly_fri, lc)
poly_lc
# convert to sf
<- st_as_sf(poly_lc)
poly_lcsf
# extract landcover values
<- exact_extract(lc, poly_lcsf)
lc_poly
# set landcover class key with single forested class
<- c(`0` = 'NA',
lc_key_for `20` = 'Water',
`31` = 'Snow/Ice',
`32` = 'Rock/Rubble',
`33` = 'Exposed/Barren Land',
`40` = 'Bryoids',
`50` = 'Shrubland',
`80` = 'Wetland',
`81` = 'Forest',
`100` = 'Herbs',
`210` = 'Forest',
`220` = 'Forest',
`230` = 'Forest')
# find pixels with forest at least 50% of pixel
# apply over list
<- sapply(lc_poly, function(x){
lc_dom_for $value <- recode(x$value, !!!lc_key_for)
x<- x %>% group_by(value) %>% summarize(sum = sum(coverage_fraction))
x <- x$value[which(x$sum == max(x$sum))]
m if((length(m) == 1) & (m == 'Forest')[1]){
if(x$sum[x$value == m]/sum(x$sum) >= 0.5){
return('Yes')
else{return('No')}
}else{return('No')}
}
})
# add new columns into dat
<- dat_fri %>% add_column(dom_for = lc_dom_for)
dat_fri
# subset FRI data frame based on whether polygon dominated by forest
<- dat_fri %>% filter(dom_for == 'Yes')
dat_fri_scr
#############################
### DATA SCREENING PART 3 ###
#############################
# require p95 >= 5
# require cc >= 50
%<>% filter(p95 >= 5, cc >= 50)
dat_fri_scr
# save extracted data frame for fast rebooting
save(dat_fri_scr, file = str_c(out_dir, '/temp/dat_fri_scr.RData'))
#####################
### LOAD GRM DATA ###
#####################
# load GRM polygon data frame
load(str_c(out_dir, '/temp/dat_grm_extr.RData'))
######################
### DATA SCREENING ###
######################
# Don't screen GRM polygons for POLYTYPE == FOR
# polygon landcover > 50% forested
# dom_for attribute already exists in GRM data from segmentation
# require p95 >= 5
# require cc >= 50
<- dat_grm %>% filter(dom_for == 'Yes',
dat_grm_scr >= 5,
p95 >= 50)
cc
# save extracted data frame for fast rebooting
save(dat_grm_scr, file = str_c(out_dir, '/temp/dat_grm_scr.RData'))
######################################################
### FUNCTIONS TO RUN K NEAREST NEIGHBOR IMPUTATION ###
######################################################
# create mode function
<- function(v) {
getmode <- unique(v)
uniqv which.max(tabulate(match(v, uniqv)))]
uniqv[
}
# create rmsd function
<- function(obs, est){
rmsd sqrt(mean((est - obs) ^ 2))
}
# create rrmsd function
<- function(obs, est){
rrmsd sqrt(mean((est - obs) ^ 2)) / mean(obs) * 100
}
# create mae function
<- function(obs, est){
mae mean(abs(est - obs))
}
# create mbe function
<- function(obs, est){
mbe mean(est - obs)
}
# create rmbe function
<- function(obs, est){
rmbe mean(est - obs) / mean(obs) * 100
}
# create knn function to output performance results
<- function(dat, vars, k) {
run_knn_fri
# subset data
<- dat %>% select(all_of(vars))
dat_nn
# scale for nn computation
<- dat_nn %>% scale
dat_nn_scaled
# run nearest neighbor
<- nn2(dat_nn_scaled, dat_nn_scaled, k = k + 1)
nn
# get nn indices
<- nn[[1]][, 2:(k + 1)]
nni
# add vars to tibble
# take mean/mode if k > 1
if(k > 1){
for(i in seq_along(vars)){
if(i == 1){
<- tibble(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := apply(nni, MARGIN = 1, FUN = function(x){
mean(dat_nn[x, i])
}))else{
}%<>% mutate(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := apply(nni, MARGIN = 1, FUN = function(x){
mean(dat_nn[x, i])
}))
}
}
# add target vars to tibble
%<>% mutate(age = dat$AGE2018,
nn_tab sp1 = dat$SP1,
sp2 = dat$SP2,
class5 = dat$class5,
class3 = dat$class3,
age_nn = apply(nni, MARGIN = 1, FUN = function(x){
mean(dat$AGE2018[x])
}),sp1_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$SP1[x])
}),sp2_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$SP2[x])
}),class5_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$class5[x])
}),class3_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$class3[x])
}))
}
# take direct nn if k == 1
if(k == 1){
for(i in seq_along(vars)){
if(i == 1){
<- tibble(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := dat_nn[nn[[1]][,2],i])
else{
}%<>% mutate(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := dat_nn[nn[[1]][,2],i])
}
}
# add target vars to tibble
%<>% mutate(age = dat$AGE2018,
nn_tab sp1 = dat$SP1,
sp2 = dat$SP2,
class5 = dat$class5,
class3 = dat$class3,
age_nn = dat$AGE2018[nn[[1]][,2]],
sp1_nn = dat$SP1[nn[[1]][,2]],
sp2_nn = dat$SP2[nn[[1]][,2]],
class5_nn = dat$class5[nn[[1]][,2]],
class3_nn = dat$class3[nn[[1]][,2]])
}
# calculate fit metrics for vars
for(i in seq_along(vars)){
if(i == 1){
<- tibble(variable = vars[i],
perform_df metric = c('rrmsd (%)', 'rmbe (%)'),
value = c(rrmsd(pull(nn_tab, vars[i]),
pull(nn_tab, str_c(vars[i], '_nn'))),
rmbe(pull(nn_tab, vars[i]),
pull(nn_tab, str_c(vars[i], '_nn')))))
else{
}%<>% add_row(variable = vars[i],
perform_df metric = c('rrmsd (%)', 'rmbe (%)'),
value = c(rrmsd(pull(nn_tab, vars[i]),
pull(nn_tab, str_c(vars[i], '_nn'))),
rmbe(pull(nn_tab, vars[i]),
pull(nn_tab, str_c(vars[i], '_nn')))))
}
}
# calculate metrics for age
%<>% add_row(variable = 'age',
perform_df metric = c('rmsd (yrs)', 'mbe (yrs)', 'mae (yrs)'),
value = c(rmsd(nn_tab$age, nn_tab$age_nn),
mbe(nn_tab$age, nn_tab$age_nn),
mae(nn_tab$age, nn_tab$age_nn)))
# calculate SP1 accuracy
# create df of SP1
<- data.frame(obs = nn_tab$sp1,
sp1 est = nn_tab$sp1_nn)
# create column of match or not
$match <- sp1$obs == sp1$est
sp1
# add total percent of matching SP1 to perform_df
%<>% add_row(variable = 'leading species',
perform_df metric = 'accuracy (%)',
value = NROW(sp1[sp1$match == T,]) /
NROW(sp1) * 100)
# calculate SP2 accuracy
# create df of SP2
<- data.frame(obs = nn_tab$sp2,
sp2 est = nn_tab$sp2_nn)
# create column of match or not
$match <- sp2$obs == sp2$est
sp2
# add total percent of matching SP2 to perform_df
%<>% add_row(variable = 'second species',
perform_df metric = 'accuracy (%)',
value = NROW(sp2[sp2$match == T,]) /
NROW(sp2) * 100)
# calculate class3 accuracy
# create df of class3
<- data.frame(obs = nn_tab$class3,
class3 est = nn_tab$class3_nn)
# create column of match or not
$match <- class3$obs == class3$est
class3
# add total percent of matching class3 to perform_df
%<>% add_row(variable = 'three func group class',
perform_df metric = 'accuracy (%)',
value = NROW(class3[class3$match == T,]) /
NROW(class3) * 100)
# calculate class5 accuracy
# create df of class5
<- data.frame(obs = nn_tab$class5,
class5 est = nn_tab$class5_nn)
# create column of match or not
$match <- class5$obs == class5$est
class5
# add total percent of matching class5 to perform_df
%<>% add_row(variable = 'five func group class',
perform_df metric = 'accuracy (%)',
value = NROW(class5[class5$match == T,]) /
NROW(class5) * 100)
# return df
return(perform_df)
}
# create knn function to output imputed vs. observed table
<- function(dat, vars, k) {
run_knn_fri_table
# subset data
<- dat %>% select(all_of(vars))
dat_nn
# scale for nn computation
<- dat_nn %>% scale
dat_nn_scaled
# run nearest neighbor
<- nn2(dat_nn_scaled, dat_nn_scaled, k = k + 1)
nn
# get nn indices
<- nn[[1]][, 2:(k + 1)]
nni
# add vars to tibble
# take mean/mode if k > 1
if(k > 1){
for(i in seq_along(vars)){
if(i == 1){
<- tibble(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := apply(nni, MARGIN = 1, FUN = function(x){
mean(dat_nn[x, i])
}))else{
}%<>% mutate(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := apply(nni, MARGIN = 1, FUN = function(x){
mean(dat_nn[x, i])
}))
}
}
# add target vars to tibble
%<>% mutate(age = dat$AGE2018,
nn_tab sp1 = dat$SP1,
sp2 = dat$SP2,
class5 = dat$class5,
class3 = dat$class3,
age_nn = apply(nni, MARGIN = 1, FUN = function(x){
mean(dat$AGE2018[x])
}),sp1_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$SP1[x])
}),sp2_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$SP2[x])
}),class5_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$class5[x])
}),class3_nn = apply(nni, MARGIN = 1, FUN = function(x){
getmode(dat$class3[x])
}))
}
# take direct nn if k == 1
if(k == 1){
for(i in seq_along(vars)){
if(i == 1){
<- tibble(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := dat_nn[nn[[1]][,2],i])
else{
}%<>% mutate(!!vars[i] := dat_nn[,i],
nn_tab !!str_c(vars[i], '_nn') := dat_nn[nn[[1]][,2],i])
}
}
# add target vars to tibble
%<>% mutate(age = dat$AGE2018,
nn_tab sp1 = dat$SP1,
sp2 = dat$SP2,
class5 = dat$class5,
class3 = dat$class3,
age_nn = dat$AGE2018[nn[[1]][,2]],
sp1_nn = dat$SP1[nn[[1]][,2]],
sp2_nn = dat$SP2[nn[[1]][,2]],
class5_nn = dat$class5[nn[[1]][,2]],
class3_nn = dat$class3[nn[[1]][,2]])
}
# return nn table
return(nn_tab)
}
########################################################
### RUN KNN IMPUTATION USING OPTIMAL MODEL VARIABLES ###
########################################################
# load FRI screened polygons
load(str_c(out_dir, '/temp/dat_fri_scr.RData'))
# subset only the attributes we need from the screened FRI polygons
%<>% select(OPI_ID, AGE2018, agb,
dat_fri_scr
gmvwl, pcum8, ba,
b6, x, y, SP1, SP2, class3, class5)
# remove any polygons with missing values
# in imputation X-variables
<- left_join(dat_fri_scr %>% select(OPI_ID, AGE2018, agb,
dat_fri_scr
gmvwl, pcum8, ba,%>% na.omit,
b6, x, y)
dat_fri_scr)
# create vector of X-variables for imputation
<- c('agb', 'gmvwl',
vars 'pcum8', 'ba',
'b6', 'x', 'y')
# run_knn_fri function to get performance results
<- run_knn_fri(dat_fri_scr, vars, k = 5)
perf
# run_knn_fri function to get imputed vs. observed values
<- run_knn_fri_table(dat_fri_scr, vars, k = 5) nn_tab
6.2.2 Results: Imputation Over FRI
Summary Stats:
# round values
%<>% mutate(value = round(value, 2))
perf
# factor variable and metric categories to order
%<>% mutate(variable = factor(variable, levels = c('age', 'leading species',
perf 'second species', 'three func group class',
'five func group class', vars))) %>%
mutate(metric = factor(metric, levels = c('rmsd (yrs)', 'mbe (yrs)', 'mae (yrs)', 'accuracy (%)', 'rrmsd (%)', 'rmbe (%)')))
# cast df
<- dcast(perf, variable ~ metric)
perf_cast
# remove x and y
%<>% filter(!(variable %in% c('x', 'y')))
perf_cast
# set NA to blank
is.na(perf_cast)] <- ''
perf_cast[
# display results
::kable(perf_cast, caption = "Imputation Performance of FRI Forest Stand Polygons", label = NA) knitr
variable | rmsd (yrs) | mbe (yrs) | mae (yrs) | accuracy (%) | rrmsd (%) | rmbe (%) |
---|---|---|---|---|---|---|
age | 20.08 | 0.17 | 14.75 | |||
leading species | 64.47 | |||||
second species | 31.75 | |||||
three func group class | 78.61 | |||||
five func group class | 80.78 | |||||
agb | 2.5 | -0.02 | ||||
gmvwl | 3.84 | -0.28 | ||||
pcum8 | 1.24 | 0.07 | ||||
ba | 2.24 | 0.05 | ||||
b6 | 2.27 | 0.09 |
The mean bias error (MBE) of age is 0.17 years, indicating the imputed estimates of age are not skewed toward younger or older values. The mean absolute error (MAE) of age is 14.75 years, which is the average difference between the observed and imputed value.
Accuracy of leading species classification is 64.47%, and a much lower 31.75% for second leading species. Three and five functional group classification have respective accuracies of 78.61% and 80.78%. Functional group classification accuracies are higher than in the RMF.
Relative root mean squared difference (RRMSD) of the imputation attributes (avg, sd, rumple, zpcum8, and red_edge_2) is below 4% for all attributes. These are low values, which demonstrate that the imputation algorithm is finding optimal matches within the database of available FRI polygons.
Relative mean bias error (RMBE) of the imputation attributes is less than 1%, meaning that the nearest neighbor selections are not skewed toward positive or negative values of these attributes.
RRMSD/RMBE are not calculated for x and y because the coordinates do not represent a value scale.
We can also generate detailed confusion matrices of the imputed vs. observed species composition.
Confusion matrices:
# calculate 3 func group confusion matrix
# build accuracy table
<- table("pred" = nn_tab$class3_nn, "ref" = nn_tab$class3)
accmat
# UA
<- diag(accmat) / rowSums(accmat) * 100
ua
# PA
<- diag(accmat) / colSums(accmat) * 100
pa
# OA
<- sum(diag(accmat)) / sum(accmat) * 100
oa
# build confusion matrix
<- addmargins(accmat)
accmat_ext <- rbind(accmat_ext, "Users" = c(pa, NA))
accmat_ext <- cbind(accmat_ext, "Producers" = c(ua, NA, oa))
accmat_ext <- round(accmat_ext, 2)
accmat_ext dimnames(accmat_ext) <- list("Imputed" = colnames(accmat_ext),
"Observed" = rownames(accmat_ext))
class(accmat_ext) <- "table"
# display results
::kable(accmat_ext %>% round, caption = "Confusion matrix of imputed vs. observed three functional group classification over FRI polygons. Rows are imputed values and columns are observed values.", label = NA) knitr
Hardwood | Mixedwood | Softwood | Sum | Users | |
---|---|---|---|---|---|
Hardwood | 53011 | 5542 | 745 | 59298 | 89 |
Mixedwood | 4227 | 14830 | 5210 | 24267 | 61 |
Softwood | 1098 | 4781 | 11560 | 17439 | 66 |
Sum | 58336 | 25153 | 17515 | 101004 | NA |
Producers | 91 | 59 | 66 | NA | 79 |
# calculate 5 func group confusion matrix
# build accuracy table
<- table("pred" = nn_tab$class5_nn, "ref" = nn_tab$class5)
accmat
# UA
<- diag(accmat) / rowSums(accmat) * 100
ua
# PA
<- diag(accmat) / colSums(accmat) * 100
pa
# OA
<- sum(diag(accmat)) / sum(accmat) * 100
oa
# build confusion matrix
<- addmargins(accmat)
accmat_ext <- rbind(accmat_ext, "Users" = c(pa, NA))
accmat_ext <- cbind(accmat_ext, "Producers" = c(ua, NA, oa))
accmat_ext <- round(accmat_ext, 2)
accmat_ext dimnames(accmat_ext) <- list("Imputed" = colnames(accmat_ext),
"Observed" = rownames(accmat_ext))
class(accmat_ext) <- "table"
# display results
::kable(accmat_ext %>% round, caption = "Confusion matrix of imputed vs. observed five functional group classification over FRI polygons. Rows are imputed values and columns are observed values.", label = NA) knitr
Black Spruce Dominated | Hardwood | Jack Pine Dominated | Mixed Conifers | Mixedwood | Sum | Users | |
---|---|---|---|---|---|---|---|
Black Spruce Dominated | 549 | 61 | 3 | 348 | 217 | 1178 | 47 |
Hardwood | 44 | 52851 | 17 | 319 | 5613 | 58844 | 90 |
Jack Pine Dominated | 20 | 30 | 357 | 128 | 148 | 683 | 52 |
Mixed Conifers | 483 | 316 | 104 | 3607 | 2248 | 6758 | 53 |
Mixedwood | 336 | 5078 | 159 | 3739 | 24229 | 33541 | 72 |
Sum | 1432 | 58336 | 640 | 8141 | 32455 | 101004 | NA |
Producers | 38 | 91 | 56 | 44 | 75 | NA | 81 |
# calculate leading species confusion matrix
# create df of sp1
<- data.frame(obs = nn_tab$sp1 %>% as.factor,
sp1 est = nn_tab$sp1_nn %>% as.factor)
# make estimate levels match obs levels
levels(sp1$est) <- c(levels(sp1$est), levels(sp1$obs)[!(levels(sp1$obs) %in% levels(sp1$est))])
$est <- factor(sp1$est, levels = levels(sp1$obs))
sp1
# create column of match or not
$match <- sp1$obs == sp1$est
sp1
# build accuracy table
<- table("pred" = sp1$est, "ref" = sp1$obs)
accmat
# UA
<- diag(accmat) / rowSums(accmat) * 100
ua
# PA
<- diag(accmat) / colSums(accmat) * 100
pa
# OA
<- sum(diag(accmat)) / sum(accmat) * 100
oa
# build confusion matrix
<- addmargins(accmat)
accmat_ext <- rbind(accmat_ext, "Users" = c(pa, NA))
accmat_ext <- cbind(accmat_ext, "Producers" = c(ua, NA, oa))
accmat_ext <- round(accmat_ext, 2)
accmat_ext dimnames(accmat_ext) <- list("Imputed" = colnames(accmat_ext),
"Observed" = rownames(accmat_ext))
class(accmat_ext) <- "table"
# display results
kbl(accmat_ext %>% round, caption = "Confusion matrix of imputed vs. observed leading species over FRI polygons. Rows are imputed values and columns are observed values.", label = NA) %>%
kable_paper() %>%
scroll_box(width = "500px")
AB | AG | AW | BD | BE | BF | BW | BY | CB | CE | CW | EW | HE | LA | MH | MR | MS | OR | OW | PB | PJ | PL | PO | PR | PS | PT | PW | SB | SW | Sum | Users | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AB | 133 | 0 | 0 | 0 | 0 | 16 | 4 | 19 | 0 | 0 | 4 | 0 | 5 | 2 | 84 | 52 | 0 | 16 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 23 | 11 | 7 | 4 | 383 | 35 |
AG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN |
AW | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | 11 |
BD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 |
BE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 0 |
BF | 42 | 0 | 0 | 0 | 0 | 1886 | 258 | 99 | 0 | 1 | 125 | 0 | 238 | 86 | 206 | 202 | 0 | 27 | 0 | 1 | 15 | 6 | 3 | 50 | 60 | 494 | 310 | 578 | 443 | 5130 | 37 |
BW | 10 | 0 | 0 | 0 | 0 | 197 | 578 | 53 | 0 | 0 | 11 | 0 | 111 | 4 | 160 | 86 | 0 | 35 | 0 | 1 | 28 | 8 | 4 | 15 | 6 | 424 | 252 | 42 | 70 | 2095 | 28 |
BY | 19 | 0 | 0 | 0 | 1 | 69 | 44 | 337 | 0 | 0 | 6 | 0 | 52 | 0 | 454 | 63 | 0 | 12 | 0 | 2 | 0 | 1 | 2 | 2 | 2 | 53 | 12 | 19 | 44 | 1194 | 28 |
CB | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
CE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN |
CW | 8 | 0 | 0 | 0 | 0 | 56 | 10 | 2 | 0 | 0 | 69 | 0 | 19 | 13 | 12 | 13 | 0 | 4 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 21 | 16 | 74 | 21 | 341 | 20 |
EW | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN |
HE | 16 | 0 | 0 | 0 | 1 | 230 | 223 | 128 | 0 | 0 | 43 | 0 | 4177 | 5 | 446 | 122 | 2 | 107 | 1 | 1 | 2 | 6 | 2 | 40 | 6 | 281 | 609 | 114 | 121 | 6683 | 62 |
LA | 2 | 0 | 0 | 0 | 0 | 39 | 1 | 0 | 0 | 0 | 10 | 0 | 3 | 33 | 8 | 3 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 13 | 10 | 41 | 10 | 177 | 19 |
MH | 378 | 0 | 55 | 22 | 82 | 156 | 187 | 1566 | 11 | 0 | 9 | 0 | 613 | 8 | 34365 | 2281 | 93 | 2617 | 14 | 2 | 1 | 3 | 5 | 15 | 3 | 337 | 240 | 17 | 54 | 43134 | 80 |
MR | 108 | 1 | 8 | 2 | 2 | 125 | 72 | 62 | 0 | 1 | 12 | 1 | 134 | 5 | 920 | 2086 | 19 | 739 | 14 | 1 | 2 | 4 | 5 | 9 | 5 | 187 | 362 | 22 | 37 | 4945 | 42 |
MS | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | 10 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 14 |
OR | 70 | 0 | 8 | 2 | 7 | 16 | 22 | 38 | 0 | 0 | 5 | 0 | 100 | 2 | 1283 | 828 | 11 | 2698 | 25 | 2 | 2 | 6 | 0 | 1 | 1 | 48 | 307 | 7 | 2 | 5491 | 49 |
OW | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 6 | 0 |
PB | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 0 | 0 | 7 | 0 |
PJ | 1 | 0 | 0 | 0 | 0 | 10 | 33 | 1 | 0 | 0 | 2 | 0 | 6 | 20 | 7 | 11 | 0 | 4 | 0 | 0 | 567 | 1 | 0 | 7 | 2 | 61 | 263 | 39 | 7 | 1042 | 54 |
PL | 1 | 0 | 0 | 0 | 0 | 4 | 5 | 2 | 0 | 0 | 3 | 0 | 3 | 0 | 5 | 3 | 0 | 6 | 0 | 0 | 0 | 11 | 0 | 1 | 0 | 4 | 8 | 2 | 1 | 59 | 19 |
PO | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 10 | 1 | 0 | 11 | 3 | 1 | 2 | 42 | 24 |
PR | 1 | 0 | 0 | 0 | 0 | 13 | 6 | 2 | 0 | 0 | 1 | 0 | 14 | 1 | 11 | 9 | 0 | 1 | 0 | 0 | 6 | 0 | 3 | 351 | 29 | 51 | 67 | 4 | 17 | 587 | 60 |
PS | 0 | 0 | 0 | 0 | 0 | 15 | 4 | 1 | 0 | 0 | 0 | 0 | 7 | 1 | 1 | 5 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 16 | 23 | 11 | 10 | 1 | 4 | 102 | 23 |
PT | 57 | 0 | 0 | 0 | 0 | 500 | 585 | 112 | 0 | 0 | 39 | 0 | 263 | 15 | 389 | 272 | 0 | 76 | 0 | 7 | 48 | 36 | 21 | 96 | 37 | 2918 | 699 | 90 | 190 | 6450 | 45 |
PW | 44 | 0 | 2 | 0 | 0 | 492 | 549 | 39 | 0 | 0 | 64 | 0 | 802 | 49 | 277 | 654 | 1 | 601 | 3 | 9 | 307 | 35 | 4 | 276 | 19 | 1107 | 13245 | 132 | 172 | 18883 | 70 |
SB | 19 | 0 | 1 | 0 | 0 | 531 | 62 | 43 | 1 | 0 | 192 | 0 | 122 | 130 | 89 | 30 | 0 | 14 | 0 | 0 | 13 | 6 | 1 | 15 | 10 | 97 | 79 | 1357 | 183 | 2995 | 45 |
SW | 9 | 0 | 0 | 0 | 0 | 284 | 83 | 44 | 0 | 0 | 33 | 0 | 75 | 26 | 40 | 36 | 0 | 1 | 0 | 0 | 2 | 0 | 5 | 12 | 6 | 98 | 63 | 101 | 271 | 1189 | 23 |
Sum | 918 | 1 | 76 | 26 | 94 | 4639 | 2731 | 2550 | 12 | 2 | 629 | 1 | 6745 | 401 | 38799 | 6775 | 131 | 6966 | 58 | 27 | 996 | 125 | 66 | 913 | 211 | 6242 | 16569 | 2648 | 1653 | 101004 | NA |
Producers | 14 | 0 | 3 | 0 | 0 | 41 | 21 | 13 | 0 | 0 | 11 | 0 | 62 | 8 | 89 | 31 | 4 | 39 | 0 | 0 | 57 | 9 | 15 | 38 | 11 | 47 | 80 | 51 | 16 | NA | 64 |
6.3 Imputation from FRI to GRM
6.3.1 Run Algorithm
# load GRM screened polygons
load(str_c(out_dir, '/temp/dat_grm_scr.RData'))
# create data frame for grm and fri metrics used in imputation
<- dat_grm_scr %>% select(id, agb,
dat_grm_imp
gmvwl, pcum8, ba,%>% na.omit
b6, x, y) <- dat_fri_scr %>% select(agb,
dat_fri_imp
gmvwl, pcum8, ba,%>% na.omit
b6, x, y)
# need to combine and scale all values together then separate again
<- rbind(dat_grm_imp %>% select(-id),
dat_comb_scaled %>% scale
dat_fri_imp) <- dat_comb_scaled[1:NROW(dat_grm_imp),]
dat_grm_scaled <- dat_comb_scaled[(NROW(dat_grm_imp)+1):(NROW(dat_grm_imp)+NROW(dat_fri_imp)),]
dat_fri_scaled
# run nearest neighbor imputation k = 5
<- nn2(dat_fri_scaled, dat_grm_scaled, k = 5)
nn
# get nn indices
<- nn[[1]]
nni
# add imputed attributes into GRM imputation data frame
for(i in seq_along(vars)){
%<>% add_column(
dat_grm_imp !!str_c(vars[i], '_imp') := apply(
nni, MARGIN = 1,
FUN = function(x){
mean(dat_fri_imp[x, vars[i]])
}
))
}
# create vector of target variables
<- c('AGE2018', 'SP1', 'SP2',
tar_vars 'class3', 'class5')
# add age and species variables to GRM data frame
for(i in seq_along(tar_vars)){
if(i == 'AGE2018'){
%<>% add_column(
dat_grm_imp !!tar_vars[i] := apply(
nni, MARGIN = 1,
FUN = function(x){
mean(dat_fri_scr[x, tar_vars[i]])
}
))else{
} %<>% add_column(
dat_grm_imp !!tar_vars[i] := apply(
nni, MARGIN = 1,
FUN = function(x){
getmode(dat_fri_scr[x, tar_vars[i]])
}
))
}
}
# update colnames
%<>% rename(age = AGE2018)
dat_grm_imp
# add values back into main GRM data frame
<- left_join(dat_grm, dat_grm_imp) dat_grm
6.3.2 Results: Imputation X-Variable Performance
# calculate performance across imputation x-variables
for(i in seq_along(vars)){
if(i == 1){
<- tibble(variable = vars[i],
perf metric = c('rrmsd (%)', 'rmbe (%)'),
value = c(rrmsd(dat_grm_imp[, vars[i]],
str_c(vars[i], '_imp')]),
dat_grm_imp[, rmbe(dat_grm_imp[, vars[i]],
str_c(vars[i], '_imp')])))
dat_grm_imp[, else{
}%<>% add_row(variable = vars[i],
perf metric = c('rrmsd (%)', 'rmbe (%)'),
value = c(rrmsd(dat_grm_imp[, vars[i]],
str_c(vars[i], '_imp')]),
dat_grm_imp[, rmbe(dat_grm_imp[, vars[i]],
str_c(vars[i], '_imp')])))
dat_grm_imp[,
}
}
# round to two decimal places
%<>% mutate(value = round(value, 2))
perf
# factor so table displays nicely
%<>% mutate(variable = factor(variable, levels = vars)) %>%
perf mutate(metric = factor(metric, levels = c('rrmsd (%)', 'rmbe (%)')))
# cast df
<- dcast(perf, variable ~ metric)
perf_cast
# remove x and y
%<>% filter(!(variable %in% c('x', 'y')))
perf_cast
# display results of imputation rmsd
::kable(perf_cast, caption = "Imputation X-Variable Performance between FRI and GRM Forest Stand Polygons", label = NA) knitr
variable | rrmsd (%) | rmbe (%) |
---|---|---|
agb | 3.31 | -0.06 |
gmvwl | 4.10 | -0.72 |
pcum8 | 1.47 | -0.30 |
ba | 2.68 | -0.08 |
b6 | 1.82 | -0.21 |
The RRMSD results are all below 5% and RMBE does not show bias toward negative of positive difference. These results are comparable to those found when conducting imputation over FRI polygons only.
6.3.3 Results: Distribution of Age
Spatial comparison:
# load GRM polygons
<- vect(grm)
poly_grm
# add new data frame to polygons
values(poly_grm) <- dat_grm
# save grm polygon output
writeVector(poly_grm, str_c(out_dir, '/grm_imp.shp'), overwrite = T)
# load FRI polygons
<- vect(fri)
poly_fri
# combine dat_fri with polygon attributes
<- left_join(as.data.frame(poly_fri), dat_fri)
dat_fri_poly
# we should only compare the screened FRI polygons so set
# other values to NA
$AGE2018[!(dat_fri_poly$OPI_ID %in% dat_fri_scr$OPI_ID)] <- NA
dat_fri_poly$class3[!(dat_fri_poly$OPI_ID %in% dat_fri_scr$OPI_ID)] <- NA
dat_fri_poly$class5[!(dat_fri_poly$OPI_ID %in% dat_fri_scr$OPI_ID)] <- NA
dat_fri_poly
# re-input attributes into FRI polygons
values(poly_fri) <- dat_fri_poly
rm(dat_fri_poly)
# save fri polygon output with key values only in screened polygons
writeVector(poly_fri, str_c(out_dir, '/fri_scr.shp'), overwrite = T)
# create df to plot age
<- st_as_sf(poly_grm)
grm_sf <- st_as_sf(poly_fri)
fri_sf
# cut dfs
%<>% mutate(age_cut = cut(age, breaks = c(seq(0, 130, 10), 250)))
grm_sf %<>% mutate(age_cut = cut(AGE2018, breaks = c(seq(0, 130, 10), 250)))
fri_sf
# plot age
<- ggplot(grm_sf) +
p1 geom_sf(mapping = aes(fill = age_cut), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = viridis(14), name = 'Age', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Imputed Age of GRM \nSegmented Forest Stands') +
theme(plot.title = element_text(hjust = 0.5))
<- ggplot(fri_sf) +
p2 geom_sf(mapping = aes(fill = age_cut), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = viridis(14), name = 'Age', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Age of FRI \nForest Stands') +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, ncol = 1)
Imputed age values show a similar spatial distribution to observed age values at a broad scale.
Density plots:
# density plots of age
<- ggplot(dat_grm, aes(x = age)) +
p1 geom_density(fill = 'grey') +
geom_vline(aes(xintercept = median(age, na.rm = T)),
linetype = "dashed",
size = 0.6) +
xlim(c(0,200)) +
ylim(c(0, 0.025)) +
theme_bw() +
xlab('Age') +
ylab('Density') +
ggtitle('Imputed Age of GRM Segmented Forest Stands') +
theme(text = element_text(size = 25),
plot.title = element_text(size=30))
<- ggplot(as.data.frame(poly_fri), aes(x = AGE2018)) +
p2 geom_density(fill = 'grey') +
geom_vline(aes(xintercept = median(AGE2018, na.rm = T)),
linetype = "dashed",
size = 0.6) +
xlim(c(0, 200)) +
ylim(c(0, 0.025)) +
theme_bw() +
xlab('Age') +
ylab('Density') +
ggtitle('Age of FRI Forest Stands') +
theme(text = element_text(size = 25),
plot.title = element_text(size=30))
grid.arrange(p1, p2, ncol = 1)
The distribution of imputed age in GRM polygons closely matches that of observed age in FRI polygons. The median age values (dotted lines) are similar.
6.3.4 Results: Distribution of Species
Spatial patterns of three functional group classification:
# plot three func group classification
<- ggplot(grm_sf) +
p1 geom_sf(mapping = aes(fill = class3), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = c('#228833', '#aa3377', '#ccbb44'),
name = 'Species Class', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Imputed Three Functional \nGroup Classification (GRM)') +
theme(plot.title = element_text(hjust = 0.5))
<- ggplot(fri_sf) +
p2 geom_sf(mapping = aes(fill = class3), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = c('#228833', '#aa3377', '#ccbb44'),
name = 'Species Class', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Three Functional \nGroup Classification (FRI)') +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, ncol = 1)
We can observe a similar distribution of species classes.
Spatial patterns of five functional group classification:
# plot five func group classification
<- ggplot(grm_sf) +
p1 geom_sf(mapping = aes(fill = class5), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = c('#ccbb44', '#228833', '#4477aa',
'#ee6677', '#aa3377'),
name = 'Species Class', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Imputed Five Functional \nGroup Classification (GRM)') +
theme(plot.title = element_text(hjust = 0.5))
<- ggplot(fri_sf) +
p2 geom_sf(mapping = aes(fill = class5), linewidth = 0.001) +
coord_sf() +
scale_fill_manual(values = c('#ccbb44', '#228833', '#4477aa',
'#ee6677', '#aa3377'),
name = 'Species Class', na.translate = F) +
theme_void(base_size = 30) +
ggtitle('Five Functional \nGroup Classification (FRI)') +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(p1, p2, ncol = 1)
Overall distribution of three functional group classification:
# create data frame for GRM 3 classes
<- dat_grm %>%
dat_grm_c3 tabyl(class3) %>%
filter(is.na(class3) == F) %>%
arrange(desc(class3)) %>%
mutate(prop = n / sum(.$n)*100) %>%
mutate(ypos = cumsum(prop) - 0.5*prop) %>%
mutate(lbl = round(prop))
# create data frame for FRI 3 classes
<- poly_fri %>% as.data.frame %>%
dat_fri_c3 tabyl(class3) %>%
filter(is.na(class3) == F) %>%
arrange(desc(class3)) %>%
mutate(prop = n / sum(.$n)*100) %>%
mutate(ypos = cumsum(prop) - 0.5*prop) %>%
mutate(lbl = round(prop))
# plot
<- ggplot(dat_grm_c3, aes(x = "", y = prop, fill = class3)) +
p1 geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
theme_void() +
geom_text(aes(y = ypos, label = str_c(lbl, "%")), size = 15) +
theme(legend.title = element_text(size = 30),
legend.text = element_text(size = 20),
legend.key.width = unit(2, 'cm'),
plot.title = element_text(size=30)) +
scale_fill_manual(values = c('#228833', '#aa3377', '#ccbb44')) +
labs(fill = "") +
ggtitle("Imputed Three Functional \nGroup Classification (GRM)")
<- ggplot(dat_fri_c3, aes(x = "", y = prop, fill = class3)) +
p2 geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
theme_void() +
geom_text(aes(y = ypos, label = str_c(lbl, "%")), size = 15) +
theme(legend.title = element_text(size = 30),
legend.text = element_text(size = 20),
legend.key.width = unit(2, 'cm'),
plot.title = element_text(size=30)) +
scale_fill_manual(values = c('#228833', '#aa3377', '#ccbb44')) +
labs(fill = "") +
ggtitle("Three Functional Group \nClassification (FRI)")
grid.arrange(p1, p2, ncol = 2)
The distribution of hardwood is the same. The GRM data have slightly more softwood and slightly less mixedwood.
Overall distribution of five functional group classification:
# distribution of 5 species classes
# create data frame for GRM 5 classes
<- dat_grm %>%
dat_grm_c5 tabyl(class5) %>%
filter(is.na(class5) == F) %>%
arrange(desc(class5)) %>%
mutate(prop = n / sum(.$n)*100) %>%
mutate(ypos = cumsum(prop) - 0.5*prop) %>%
mutate(lbl = round(prop))
# create data frame for FRI 5 classes
<- poly_fri %>% as.data.frame %>%
dat_fri_c5 tabyl(class5) %>%
filter(is.na(class5) == F) %>%
arrange(desc(class5)) %>%
mutate(prop = n / sum(.$n)*100) %>%
mutate(ypos = cumsum(prop) - 0.5*prop) %>%
mutate(lbl = round(prop))
# plot
<- ggplot(dat_grm_c5, aes(x = "", y = prop, fill = class5)) +
p1 geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
theme_void() +
geom_text(aes(y = ypos, label = str_c(lbl, "%")), size = 15) +
theme(legend.title = element_text(size = 30),
legend.text = element_text(size = 20),
legend.key.width = unit(2, 'cm'),
plot.title = element_text(size=30)) +
scale_fill_manual(values = c('#ccbb44', '#228833', '#4477aa',
'#ee6677', '#aa3377')) +
labs(fill = "") +
ggtitle("Imputed Five Functional \nGroup Classification (GRM)")
<- ggplot(dat_fri_c5, aes(x = "", y = prop, fill = class5)) +
p2 geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
theme_void() +
geom_text(aes(y = ypos, label = str_c(lbl, "%")), size = 15) +
theme(legend.title = element_text(size = 30),
legend.text = element_text(size = 20),
legend.key.width = unit(2, 'cm'),
plot.title = element_text(size=30)) +
scale_fill_manual(values = c('#ccbb44', '#228833', '#4477aa',
'#ee6677', '#aa3377')) +
labs(fill = "") +
ggtitle("Five Functional Group \nClassification (FRI)")
grid.arrange(p1, p2, ncol = 2)
The distributions are again very similar. The RMF five species classification does not apply to the FSF in general, due to the negligible amounts of black spruce and jack pine.