<- link2GI::linkOTB(searchLocation = "~/apps/OTB-9.1.0-Linux/")
otb Sys.setenv(OTB_APPLICATION_PATH = file.path(dirname(as.character(otb$pathOTB)), "lib/otb/applications"))
Sys.setenv(PATH = paste(otb$pathOTB, Sys.getenv("PATH"), sep = ":"))
Tree Species Classification Cleaning with OTB and Terra
Purpose of Tree Data for ENVI-met Modeling This workflow prepares classified tree species data as a basis for generating individual tree objects for use in ENVI-met’s 3DPLANT module. Each tree location is linked to a simplified vertical LAD profile and assigned to a species class (e.g. Fagus sylvatica, Quercus robur, Pseudotsuga menziesii), which defines its interaction with ENVI-met’s radiation and vegetation modules.
The underlying classification raster originates from official, state-level aerial RGB orthophotos with a spatial resolution of 0.3 m. These orthophotos provide sufficient detail to allow object-based species classification at the level of individual tree crowns.
Species prediction was performed using a leave-location-out forward feature selection approach implemented via the CAST package in R. This ensures that classification results generalize across spatially distinct regions by avoiding overfitting to local spectral conditions.
Before assigning vegetation objects to the ENVI-met model domain, species maps are despeckled, aggregated, and contextually corrected to remove isolated or misclassified tree crowns (e.g. Douglas-fir pixels in beech-dominated stands). This ensures that each synthetic ENVI-met tree is placed in a semantically and structurally consistent vegetation context.
1. Setup and Environment
Manual Setup of OTB Environment for use with link2GI
The R package link2GI
provides wrapper functions to connect R with external geospatial software like Orfeo Toolbox (OTB), GRASS GIS, and QGIS. The function linkOTB()
is used to locate OTB binaries and configure the R session to allow calling OTB applications via command-line interface (CLI) from R.
However, in many modern setups—especially on Linux or in manually installed environments (e.g., extracted zip files)—the required environment variables are not set globally, and linkOTB()
alone is not sufficient. This typically leads to errors like:
- “Application not found”
- “No XML application descriptors”
- “Could not find CLI tools”
To fix this, two critical environment variables need to be explicitly set after calling linkOTB()
:
OTB_APPLICATION_PATH
: This must point to the directorylib/otb/applications
, where all XML definitions of the OTB applications are stored. These XML files describe how to call each OTB tool from the command line.PATH
: This must include the directory where OTB binaries likeotbcli_BandMath
are stored (typicallybin/
). Without this, system calls from R to OTB will fail.
Example for Linux:
Example for Windows:
<- link2GI::linkOTB(searchLocation = "C:/OTB-9.1.0-Win64/")
otb Sys.setenv(OTB_APPLICATION_PATH = "C:/OTB-9.1.0-Win64/lib/otb/applications")
Sys.setenv(PATH = paste("C:/OTB-9.1.0-Win64/bin", Sys.getenv("PATH"), sep = ";"))
Note:
- On Windows, use forward slashes
/
in the path. - The PATH separator is
;
on Windows and:
on Unix-based systems.
This workaround is often necessary in portable, containerized, or research setups where full system integration (e.g., PATH exports, registry entries) is not available or not desired. It ensures that link2GI
can still function as intended by emulating the expected environment internally within R.
# Load libraries
library(terra)
library(RColorBrewer)
library(link2GI)
library(tools)
library(mapview)
library(dplyr)
# Project root and OTB environment
<- rprojroot::find_rstudio_root_file()
root_folder <- link2GI::linkOTB(searchLocation = "~/apps/OTB-9.1.0-Linux/")
otb Sys.setenv(OTB_APPLICATION_PATH = file.path(dirname(as.character(otb$pathOTB)), "lib/otb/applications"))
Sys.setenv(PATH = paste(otb$pathOTB, Sys.getenv("PATH"), sep = ":"))
2. Parameters and Class Legend
<- 1
target_res <- "5-25_MOF_rgb"
fn <- 25832
epsg <- raster::extent(477500, 478218, 5631730, 5632500)
sapflow_ext
<- data.frame(
ts ID = 1:12,
value = c("agriculture", "alder", "ash", "beech", "douglas_fir", "larch",
"oak", "pastures", "roads", "settlements", "spruce", "water")
)
3. Rationale: Why Despeckle First?
🔍 Why do we despeckle at original resolution before aggregation and contextual filtering?
- Preserve spatial detail: High-frequency noise (e.g., misclassified single pixels) must be removed before they get averaged into larger grid cells.
- Avoid error propagation: Aggregating first would carry speckle artifacts into the coarser grid.
- Enable ecologically meaningful correction: Focal filtering (e.g., Douglas-fir to Oak) should be applied on ~1 m resolution where “dominance” of classes has meaning.
- Step order summary:
ClassificationMapRegularization
: Clean noise at 0.2 maggregate()
: Smooth to 1 m (e.g., crown scale)focal()
: Replace ecologically implausible patches
4. Load and Preprocess Species Classification
<- readRDS("../data/aerial/sfprediction_ffs_5-25_MOF_rgb.rds")
sapflow_species ::writeRaster(sapflow_species, "../data/aerial/prediction_ffs.tif", overwrite = TRUE)
raster<- raster::crop(sapflow_species, sapflow_ext)
sapflow_species ::writeRaster(sapflow_species, "../data/aerial/prediction_ffs_cut.tif", overwrite = TRUE) raster
5. Majority Filtering (OTB Despeckle)
<- parseOTBFunction("ClassificationMapRegularization", otb)
cmr $io.in <- "../data/aerial/prediction_ffs.tif"
cmr$io.out <- "../data/aerial/majority_out.tif"
cmr$ip.radius <- "1"
cmr$progress <- "true"
cmr<- runOTB(cmr, gili = otb$pathOTB, quiet = FALSE, retRaster = TRUE) filter_treespecies
6. Aggregate to 1 m Resolution
<- rast("../data/aerial/majority_out.tif")
r <- res(r)[1]
cur_res <- round(target_res / cur_res)
fact if (target_res <= cur_res) stop("Target resolution is lower than input resolution.")
<- aggregate(r, fact = fact, fun = median, na.rm = TRUE)
r_agg <- sprintf("../data/aerial/%s_%sm.tif", tools::file_path_sans_ext(basename("../data/aerial/aggregate.tif")), target_res)
outfile writeRaster(r_agg, outfile, overwrite = TRUE)
7. Contextual Correction (Douglas → Beech/Oak)
<- function(rast_input,
replace_douglas_in_buche_eiche window_size = 5,
douglas_value = 5,
target_values = c(4, 7),
target_res = 1.0) {
if (!inherits(rast_input, "SpatRaster")) stop("Input must be SpatRaster")
if (window_size %% 2 == 0) stop("window_size must be odd")
<- matrix(1, nrow = window_size, ncol = window_size)
w <- focal(rast_input, w = w, fun = modal, na.policy = "omit", na.rm = TRUE, progress = "text")
r_mode <- rast_input == douglas_value
is_douglas <- r_mode %in% target_values
is_oak_beech_mode <- is_douglas & is_oak_beech_mode
replace_mask <- rast_input
r_new <- r_mode[replace_mask]
r_new[replace_mask] writeRaster(r_new, sprintf("../data/aerial/%s_%sm.tif", "agg_cleand", target_res), overwrite = TRUE)
return(r_new)
}<- replace_douglas_in_buche_eiche(r_agg, window_size = 5) species_cleaned
Complete Code
#------------------------------------------------------------------------------
# Script Type: Processing script
# Script Name: 30_filter_classified_species_map.R
# Author: Chris Reudenbach, creuden@gmail.com
#
# Description:
# - Loads a classified raster map of tree species
# - Applies spatial smoothing using OTB ClassificationMapRegularization
# - Aggregates to 1 m resolution using median filtering
# - Performs contextual correction: replaces isolated Douglas-fir pixels
# with Beech or Oak if those are dominant in the local neighborhood
#
# Input:
# - RDS and GeoTIFF classification of tree species (0.2 m resolution)
#
# Output:
# - Cleaned and aggregated species raster (1 m resolution)
#
# Dependencies:
# - OTB 9.1+ with PATH and OTB_APPLICATION_PATH correctly set
#
# Copyright: Chris Reudenbach 2021, GPL (>= 3)
# Git: https://github.com/gisma-courses/microclimate.git
#
# Commentary:
# This workflow separates two crucial but distinct steps in map cleaning:
# 1. **Noise reduction (smoothing):** Applied via OTB's ClassificationMapRegularization.
# - It performs a fast majority-based smoothing using a local moving window (e.g., 3x3).
# - This step removes small speckles or misclassified pixels in homogeneous areas.
# - It is computationally efficient due to OTB's C++-based implementation.
#
# 2. **Semantic filtering:** Performed in R via a contextual reclassification function.
# - Specifically targets ecologically unlikely or isolated Douglas-fir pixels.
# - These are replaced with surrounding Beech or Oak pixels if they dominate locally.
# - Allows flexible, rule-based filtering that OTB cannot natively perform.
#
# ➤ Both steps are technically possible in R using terra::focal(), but:
# - Smoothing with `focal()` is **much slower** on large rasters (single-threaded).
# - OTB is highly recommended for performance.
#
# ➤ The R-based semantic filtering step is **required** if logical replacement
# rules (like Douglas-fir substitution) are needed. This goes beyond statistical smoothing.
#------------------------------------------------------------------------------
# === Libraries ===
library(terra) # raster handling
terra 1.8.60
library(RColorBrewer) # color palettes
library(link2GI) # OTB integration
library(rprojroot)
library(tools) # file name tools
library(mapview) # interactive maps
library(dplyr) # data manipulation
Attaching package: 'dplyr'
The following objects are masked from 'package:terra':
intersect, union
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# === Environment and paths ===
<- find_rstudio_root_file()
root_folder
# Set up OTB environment
<- link2GI::linkOTB(searchLocation = "~/apps/OTB-9.1.0-Linux/")
otb Sys.setenv(OTB_APPLICATION_PATH = file.path(dirname(as.character(otb$pathOTB)), "lib/otb/applications"))
Sys.setenv(PATH = paste(otb$pathOTB, Sys.getenv("PATH"), sep = ":"))
# === Parameters ===
<- 1 # desired resolution in meters
target_res <- 2 # (not used yet)
min_tree_height <- "5-25_MOF_rgb" # image stem
fn <- 25832 # UTM32N
epsg <- raster::extent(477500, 478218, 5631730, 5632500) # area of interest
sapflow_ext
# === Class ID legend ===
<- data.frame(
ts ID = 1:12,
value = c(
"agriculture",
"alder",
"ash",
"beech",
"douglas_fir",
"larch",
"oak",
"pastures",
"roads",
"settlements",
"spruce",
"water"
)
)
#------------------------------------------------------------------------------
# FUNCTION: Replace isolated Douglas-fir with Beech or Oak if dominant around
#------------------------------------------------------------------------------
<- function(rast_input,
replace_douglas_in_buche_eiche window_size = 5,
douglas_value = 5,
target_values = c(4, 7),
target_res = 1.0) {
if (window_size %% 2 == 0)
stop("window_size must be odd")
# Focal window matrix (square)
<- matrix(1, nrow = window_size, ncol = window_size)
w
# Run OTB ClassificationMapRegularization to compute local mode
<- parseOTBFunction("ClassificationMapRegularization", otb)
cmr $io.in <- sprintf("../data/aerial/%s_%sm.tif",
cmr::file_path_sans_ext(basename("../data/aerial/aggregate.tif")),
tools
target_res)$io.out <- sprintf("../data/aerial/%s_%sm.tif",
cmr::file_path_sans_ext(basename("../data/aerial/aggregate_mode.tif")),
tools
window_size)$ip.radius <- as.character((window_size - 1) / 2) # for 5x5 window: radius = (5 - 1)/2 = 2
cmr$progress <- "true"
cmr
runOTB(cmr, gili = otb$pathOTB, quiet = FALSE)
# Identify Douglas-fir pixels and surrounding Beech/Oak dominance
= rast(cmr$io.out)
r_mode = rast(cmr$io.in)
rast_input <- rast_input == douglas_value
is_douglas <- r_mode %in% target_values
is_oak_beech_mode <- is_douglas & is_oak_beech_mode
replace_mask
# Replace Douglas-fir where Beech or Oak dominate
<- rast_input
r_new <- r_mode[replace_mask]
r_new[replace_mask]
# Construct output path
<- paste0("../data/aerial/",
outname "agg_cleand_",
as.character(target_res),
"m.tif")
writeRaster(r_new, outname,overwrite = TRUE)
return(r_new)
}
#------------------------------------------------------------------------------
# STEP 1: Read tree species classification from RDS
#------------------------------------------------------------------------------
<- readRDS("../data/aerial/sfprediction_ffs_5-25_MOF_rgb.rds")
sapflow_species
# Write to GeoTIFF for further processing
::writeRaster(
raster
sapflow_species,"../data/aerial/prediction_ffs.tif",
progress = "text",
overwrite = TRUE
)
# Crop to sapflow test area
<- raster::crop(sapflow_species, sapflow_ext)
sapflow_species ::writeRaster(
raster
sapflow_species,"../data/aerial/prediction_ffs_cut.tif",
progress = "text",
overwrite = TRUE
)
#------------------------------------------------------------------------------
# STEP 2: Run OTB ClassificationMapRegularization (majority filter)
#------------------------------------------------------------------------------
<- parseOTBFunction("ClassificationMapRegularization", otb)
cmr $io.in <- "../data/aerial/prediction_ffs.tif"
cmr$io.out <- "../data/aerial/majority_out.tif"
cmr$progress <- "true"
cmr$ip.radius <- "1"
cmr
<- runOTB(cmr,
filter_treespecies gili = otb$pathOTB,
quiet = FALSE,
retRaster = TRUE)
otbcli_ClassificationMapRegularization -io.in ../data/aerial/prediction_ffs.tif -io.out /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/majority_out.tif -ip.radius 1 -ip.suvbool false -ip.nodatalabel 0 -ip.undecidedlabel 0 -ip.onlyisolatedpixels false -ip.isolatedthreshold 1 -ram 256 -progress true
[1] "2025-07-30 12:30:36 (INFO) ClassificationMapRegularization: Default RAM limit for OTB is 256 MB"
[1] "2025-07-30 12:30:36 (INFO) ClassificationMapRegularization: GDAL maximum cache size is 4814 MB"
[1] "2025-07-30 12:30:36 (INFO) ClassificationMapRegularization: OTB will use at most 32 threads"
[1] "2025-07-30 12:30:36 (INFO): Loading metadata from official product"
[1] "2025-07-30 12:30:36 (INFO): Estimated memory for full processing: 1052.34MB (avail.: 256 MB), optimal image partitioning: 5 blocks"
[1] "2025-07-30 12:30:36 (INFO): File /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/majority_out.tif will be written in 6 blocks of 14798x2044 pixels"
[1] "Writing /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/majority_out.tif...: 100% [**************************************************] (4s)"
#------------------------------------------------------------------------------
# STEP 3: Aggregate to 1 m resolution using median
#------------------------------------------------------------------------------
<- rast("../data/aerial/majority_out.tif")
r <- res(r)[1]
cur_res <- round(target_res / cur_res)
fact
if (target_res <= cur_res)
stop("Zielauflösung ist kleiner als aktuelle.")
<- aggregate(r,
r_agg fact = fact,
fun = median,
na.rm = TRUE)
|---------|---------|---------|---------|
=========================================
# Build automatic filename
<- sprintf("../data/aerial/%s_%sm.tif",
outfile ::file_path_sans_ext(basename("../data/aerial/aggregate.tif")),
tools
target_res)
# Save aggregated raster
writeRaster(r_agg, outfile, overwrite = TRUE)
#------------------------------------------------------------------------------
# STEP 4: Clean Douglas-fir patches contextually
#------------------------------------------------------------------------------
<- replace_douglas_in_buche_eiche(window_size = 9) species_cleaned
otbcli_ClassificationMapRegularization -io.in ../data/aerial/aggregate_1m.tif -io.out /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/aggregate_mode_9m.tif -ip.radius 4 -ip.suvbool false -ip.nodatalabel 0 -ip.undecidedlabel 0 -ip.onlyisolatedpixels false -ip.isolatedthreshold 1 -ram 256 -progress true
[1] "2025-07-30 12:30:43 (INFO) ClassificationMapRegularization: Default RAM limit for OTB is 256 MB"
[1] "2025-07-30 12:30:43 (INFO) ClassificationMapRegularization: GDAL maximum cache size is 4814 MB"
[1] "2025-07-30 12:30:43 (INFO) ClassificationMapRegularization: OTB will use at most 32 threads"
[1] "2025-07-30 12:30:43 (INFO): Loading metadata from official product"
[1] "2025-07-30 12:30:43 (INFO): Estimated memory for full processing: 43.842MB (avail.: 256 MB), optimal image partitioning: 1 blocks"
[1] "2025-07-30 12:30:43 (INFO): File /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/aggregate_mode_9m.tif will be written in 1 blocks of 2960x2453 pixels"
[1] "Writing /home/creu/edu/gisma-courses/tls-tree-climate/data/aerial/aggregate_mode_9m.tif...: 100% [**************************************************] (0s)"
#------------------------------------------------------------------------------
# STEP 5: Visualize intermediate steps (interactive)
#------------------------------------------------------------------------------
library(mapview)
library(leafsync)
library(htmlwidgets)
library(terra)
library(RColorBrewer)
# Define common parameters
<- brewer.pal(12, "Paired")
palette <- list(lng = 8.68443, lat = 50.84089, zoom = 18)
zoom_center
# -- Map 1: Raw species classification (0.2 m)
<- mapview(
m1 ::crop(sapflow_species, sapflow_ext),
terracol.regions = palette,
at = ts$ID,
layer.name = "Species 0.2m"
)
Warning in rasterCheckSize(x, maxpixels = maxpixels): maximum number of pixels for Raster* viewing is 5e+05 ;
the supplied Raster* has 13821500
... decreasing Raster* resolution to 5e+05 pixels
to view full resolution set 'maxpixels = 13821500 '
# -- Map 2: OTB 3×3 modal smoothing
<- mapview(
m2 ::crop(filter_treespecies, sapflow_ext),
terracol.regions = palette,
at = ts$ID,
layer.name = "3x3 modal_filt"
)
Number of pixels is above 5e+05.Only about 5e+05 pixels will be shown.
You can increase the value of `maxpixels` to 13821500 to avoid this.
# -- Map 3: Aggregated to 1 m resolution
<- mapview(
m3 ::crop(r_agg, sapflow_ext),
terracol.regions = palette,
at = ts$ID,
layer.name = "Aggregated 1m"
)
Number of pixels is above 5e+05.Only about 5e+05 pixels will be shown.
You can increase the value of `maxpixels` to 552860 to avoid this.
Warning: Found more colors (12) than zcol values (11)!
Trimming colors to match number of zcol values.
# -- Map 4: Douglas-fir replaced by contextual rules
<- mapview(
m4 ::crop(species_cleaned, sapflow_ext),
terracol.regions = palette,
fgb = TRUE,
layer.name = "Douglas out 1m"
)
Number of pixels is above 5e+05.Only about 5e+05 pixels will be shown.
You can increase the value of `maxpixels` to 552860 to avoid this.
Warning: Found more colors (12) than zcol values (11)!
Trimming colors to match number of zcol values.
# Convert to leaflet and apply zoom center
<- m1@map %>% leaflet::setView(zoom_center$lng, zoom_center$lat, zoom_center$zoom)
lm1 <- m2@map %>% leaflet::setView(zoom_center$lng, zoom_center$lat, zoom_center$zoom)
lm2 <- m3@map %>% leaflet::setView(zoom_center$lng, zoom_center$lat, zoom_center$zoom)
lm3 <- m4@map %>% leaflet::setView(zoom_center$lng, zoom_center$lat, zoom_center$zoom)
lm4
# Synchronize maps side-by-side
<- sync(lm1, lm2, lm3, lm4)
out
out