#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ==============================================================================
# author :Ghislain Vieilledent
# email :ghislain.vieilledent@cirad.fr, ghislainv@gmail.com
# web :https://ecology.ghislainv.fr
# python_version :>=3
# license :GPLv3
# ==============================================================================
# Third party imports
import matplotlib.pyplot as plt
import numpy as np
from osgeo import gdal
import pandas as pd
# Local application imports
from .misc import progress_bar, make_square
# validation
[docs]
def validation(fcc_file, time_interval,
riskmap_file, tab_file_defrate,
csize=300,
no_quantity_error=False,
tab_file_pred="pred_obs.csv",
fig_file_pred="pred_obs.png",
figsize=(6.4, 4.8),
dpi=100, verbose=True):
"""Validation of the deforestation risk map.
This function computes the observed and predicted deforestion (in
ha) for the validation time period. Deforestion is computed for
spatial grid cells of a maximum size of 10km. Deforestation rate
estimates obtained with the ``defrate_per_cat`` function are used
to compute the predicted deforestation in each grid cell. The
function creates both a ``.csv`` file with the validation data and a
plot comparing predictions vs. observations. The function returns
the weighted Root Mean Squared Error (wRMSE, in hectares)
associated with the deforestation predictions.
:param fcc_file: Input raster file of forest cover change at three
dates (123). 1: first period deforestation, 2: second period
deforestation, 3: remaining forest at the end of the second
period. No data value must be 0 (zero).
:param time_interval: Time interval (in years) for forest cover
change observations during the **validation** period.
:param riskmap_file: Input raster file with categories of
spatial deforestation risk. This file is typically obtained
with function ``defor_cat()``.
:param tab_file_defrate: Path to the ``.csv`` input file with
estimates of deforestation rates per category of deforestation
risk. This file is typically obtained with function
``defrate_per_cat()``.
:param csize: Spatial cell size in number of pixels. Must
correspond to a distance < 10 km. Default to 300 corresponding
to 9 km for a 30 m resolution raster.
:param no_quantity_error: Correct the deforestation rates to avoid
a "quantity" error on deforestation due to differences in
total deforestation between first and second periods. This
point is being discussed to improve the JNR methodology.
:param tab_file_pred: Path to the ``.csv`` output file with validation
data.
:param fig_file_pred: Path to the ``.png`` output file for the
predictions vs. observations plot.
:param figsize: Figure size.
:param dpi: Resolution for output image.
:param verbose: Logical. Whether to print messages or not. Default
to ``True``.
:return: A dictionary. With ``wRMSE``: weighted Root Mean Squared
Error (in hectares) for the deforestation predictions,
``ncell``: the number of grid cells with forest cover > 0 at
the beginning of the validation period, ``csize``: the cell size
in number of pixels, ``csize_km``: the cell size in
kilometers.
"""
# ==============================================================
# Input data
# ==============================================================
# Get fcc raster data
fcc_ds = gdal.Open(fcc_file)
fcc_band = fcc_ds.GetRasterBand(1)
# Get defor_cat raster data
defor_cat_ds = gdal.Open(riskmap_file)
defor_cat_band = defor_cat_ds.GetRasterBand(1)
# Get defrate per cat
defrate_per_cat = pd.read_csv(tab_file_defrate)
cat = defrate_per_cat["cat"].values
# Pixel area (in unit square, eg. meter square)
gt = fcc_ds.GetGeoTransform()
pix_area = gt[1] * (-gt[5])
# Make square
squareinfo = make_square(fcc_file, csize)
nsquare = squareinfo[0]
nsquare_x = squareinfo[1]
x = squareinfo[3]
y = squareinfo[4]
nx = squareinfo[5]
ny = squareinfo[6]
# Cell size in km
csize_km = round(csize * gt[1] / 1000, 1)
# Create a table to save the results
data = {"cell": list(range(nsquare)), "nfor_obs": 0,
"ndefor_obs": 0, "ndefor_pred": 0}
df = pd.DataFrame(data)
# ==============================================================
# Loop on grid cells
# ==============================================================
# Loop on squares
for s in range(nsquare):
# Progress bar
if verbose:
progress_bar(nsquare, s + 1)
# Position in 1D-arrays
px = s % nsquare_x
py = s // nsquare_x
# Observed forest cover and deforestation for validation period
fcc_data = fcc_band.ReadAsArray(x[px], y[py], nx[px], ny[py])
df.loc[s, "nfor_obs"] = np.sum(fcc_data > 1)
df.loc[s, "ndefor_obs"] = np.sum(fcc_data == 2)
# Predicted deforestation for validation period
defor_cat_data = defor_cat_band.ReadAsArray(
x[px], y[py], nx[px], ny[py])
defor_cat = pd.Categorical(defor_cat_data.flatten(), categories=cat)
defor_cat_count = defor_cat.value_counts().values
# Deforestation rate on validation period
defrate_per_cat_period = (
1 - (1 - defrate_per_cat["rate"].values) ** time_interval)
# Predicted deforestation (area)
df.loc[s, "ndefor_pred"] = np.nansum(defor_cat_count *
defrate_per_cat_period)
# Note: np.nansum is used here as some cat might not exist and
# have nan for defrate (eg. in the case of Equal Interval).
# Dereference drivers
del fcc_ds, defor_cat_ds
# ==============================================================
# wRMSE and plot
# ==============================================================
# Select cells with forest cover > 0
df = df[df["nfor_obs"] > 0]
ncell = df.shape[0]
if ncell < 1000:
msg = ("Number of cells with forest cover > 0 ha is < 1000. "
"Please decrease the spatial cell size 'csize' to get "
"more cells.")
raise ValueError(msg)
# Correction for no_quantity_error (correction factor cf)
if no_quantity_error:
cf = df["ndefor_obs"].sum() / df["ndefor_pred"].sum()
df["ndefor_pred"] = cf * df["ndefor_pred"]
# Compute areas in ha
df["nfor_obs_ha"] = df["nfor_obs"] * pix_area / 10000
df["ndefor_obs_ha"] = df["ndefor_obs"] * pix_area / 10000
df["ndefor_pred_ha"] = df["ndefor_pred"] * pix_area / 10000
# Export the table of results
df.to_csv(tab_file_pred, sep=",", header=True,
index=False, index_label=False)
# Compute wRMSE
w = df["nfor_obs_ha"] / df["nfor_obs_ha"].sum()
error_pred = df["ndefor_pred_ha"] - df["ndefor_obs_ha"]
squared_error = (error_pred) ** 2
wRMSE = round(np.sqrt(sum(squared_error * w)), 1)
# Plot title
title = ("Predicted vs. observed deforestation (ha) "
"in " + str(csize_km) + " x " + str(csize_km) +
" km grid cells.")
# Points or identity line
p = [df["ndefor_obs_ha"].min(), df["ndefor_obs_ha"].max()]
# Plot predictions vs. observations
fig = plt.figure(figsize=figsize, dpi=dpi)
plt.subplot(111)
plt.scatter(df["ndefor_obs_ha"], df["ndefor_pred_ha"],
color=None, marker="o", edgecolor="k")
plt.plot(p, p, "r-")
plt.title(title)
if no_quantity_error:
plt.suptitle("Predictions have been corrected to avoid "
"\"quantity\" error.")
plt.xlabel("Observed deforestation (ha)")
plt.ylabel("Predicted deforestation (ha)")
# Text wRMSE and ncell
t = "wRMSE = " + str(wRMSE) + " ha\n n = " + str(ncell)
x_text = df["ndefor_obs_ha"].max()
y_text = 0
plt.text(x_text, y_text, t, ha="right", va="bottom")
fig.savefig(fig_file_pred)
plt.close(fig)
# Results
return {'wRMSE': wRMSE, 'ncell': ncell,
'csize': csize, 'csize_km': csize_km}
# Test
# import os
# os.chdir("../docsrc/notebooks")
# fcc_file = "data/fcc123_GLP.tif"
# time_interval = 10
# riskmap_file = "outputs_steps/riskmap.tif"
# tab_file_defrate = "outputs_steps/defrate_per_cat.csv"
# csize = 40
# tab_file_pred = "outputs_steps/pred_obs.csv"
# fig_file_pred = "outputs_steps/pred_obs.png"
# figsize = (6.4, 4.8)
# dpi = 100
# verbose = True
# validation(fcc_file, time_interval, riskmap_file, tab_file_defrate,
# csize, tab_file_pred, fig_file_pred)
# End