Source code for riskmapjnr.validation

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# ==============================================================================
# author          :Ghislain Vieilledent
# email           :ghislain.vieilledent@cirad.fr, ghislainv@gmail.com
# web             :https://ecology.ghislainv.fr
# python_version  :>=3
# license         :GPLv3
# ==============================================================================


# Third party imports
import matplotlib.pyplot as plt
import numpy as np
from osgeo import gdal
import pandas as pd

# Local application imports
from .misc import progress_bar, make_square


# validation
[docs] def validation(fcc_file, time_interval, riskmap_file, tab_file_defrate, csize=300, no_quantity_error=False, tab_file_pred="pred_obs.csv", fig_file_pred="pred_obs.png", figsize=(6.4, 4.8), dpi=100, verbose=True): """Validation of the deforestation risk map. This function computes the observed and predicted deforestion (in ha) for the validation time period. Deforestion is computed for spatial grid cells of a maximum size of 10km. Deforestation rate estimates obtained with the ``defrate_per_cat`` function are used to compute the predicted deforestation in each grid cell. The function creates both a ``.csv`` file with the validation data and a plot comparing predictions vs. observations. The function returns the weighted Root Mean Squared Error (wRMSE, in hectares) associated with the deforestation predictions. :param fcc_file: Input raster file of forest cover change at three dates (123). 1: first period deforestation, 2: second period deforestation, 3: remaining forest at the end of the second period. No data value must be 0 (zero). :param time_interval: Time interval (in years) for forest cover change observations during the **validation** period. :param riskmap_file: Input raster file with categories of spatial deforestation risk. This file is typically obtained with function ``defor_cat()``. :param tab_file_defrate: Path to the ``.csv`` input file with estimates of deforestation rates per category of deforestation risk. This file is typically obtained with function ``defrate_per_cat()``. :param csize: Spatial cell size in number of pixels. Must correspond to a distance < 10 km. Default to 300 corresponding to 9 km for a 30 m resolution raster. :param no_quantity_error: Correct the deforestation rates to avoid a "quantity" error on deforestation due to differences in total deforestation between first and second periods. This point is being discussed to improve the JNR methodology. :param tab_file_pred: Path to the ``.csv`` output file with validation data. :param fig_file_pred: Path to the ``.png`` output file for the predictions vs. observations plot. :param figsize: Figure size. :param dpi: Resolution for output image. :param verbose: Logical. Whether to print messages or not. Default to ``True``. :return: A dictionary. With ``wRMSE``: weighted Root Mean Squared Error (in hectares) for the deforestation predictions, ``ncell``: the number of grid cells with forest cover > 0 at the beginning of the validation period, ``csize``: the cell size in number of pixels, ``csize_km``: the cell size in kilometers. """ # ============================================================== # Input data # ============================================================== # Get fcc raster data fcc_ds = gdal.Open(fcc_file) fcc_band = fcc_ds.GetRasterBand(1) # Get defor_cat raster data defor_cat_ds = gdal.Open(riskmap_file) defor_cat_band = defor_cat_ds.GetRasterBand(1) # Get defrate per cat defrate_per_cat = pd.read_csv(tab_file_defrate) cat = defrate_per_cat["cat"].values # Pixel area (in unit square, eg. meter square) gt = fcc_ds.GetGeoTransform() pix_area = gt[1] * (-gt[5]) # Make square squareinfo = make_square(fcc_file, csize) nsquare = squareinfo[0] nsquare_x = squareinfo[1] x = squareinfo[3] y = squareinfo[4] nx = squareinfo[5] ny = squareinfo[6] # Cell size in km csize_km = round(csize * gt[1] / 1000, 1) # Create a table to save the results data = {"cell": list(range(nsquare)), "nfor_obs": 0, "ndefor_obs": 0, "ndefor_pred": 0} df = pd.DataFrame(data) # ============================================================== # Loop on grid cells # ============================================================== # Loop on squares for s in range(nsquare): # Progress bar if verbose: progress_bar(nsquare, s + 1) # Position in 1D-arrays px = s % nsquare_x py = s // nsquare_x # Observed forest cover and deforestation for validation period fcc_data = fcc_band.ReadAsArray(x[px], y[py], nx[px], ny[py]) df.loc[s, "nfor_obs"] = np.sum(fcc_data > 1) df.loc[s, "ndefor_obs"] = np.sum(fcc_data == 2) # Predicted deforestation for validation period defor_cat_data = defor_cat_band.ReadAsArray( x[px], y[py], nx[px], ny[py]) defor_cat = pd.Categorical(defor_cat_data.flatten(), categories=cat) defor_cat_count = defor_cat.value_counts().values # Deforestation rate on validation period defrate_per_cat_period = ( 1 - (1 - defrate_per_cat["rate"].values) ** time_interval) # Predicted deforestation (area) df.loc[s, "ndefor_pred"] = np.nansum(defor_cat_count * defrate_per_cat_period) # Note: np.nansum is used here as some cat might not exist and # have nan for defrate (eg. in the case of Equal Interval). # Dereference drivers del fcc_ds, defor_cat_ds # ============================================================== # wRMSE and plot # ============================================================== # Select cells with forest cover > 0 df = df[df["nfor_obs"] > 0] ncell = df.shape[0] if ncell < 1000: msg = ("Number of cells with forest cover > 0 ha is < 1000. " "Please decrease the spatial cell size 'csize' to get " "more cells.") raise ValueError(msg) # Correction for no_quantity_error (correction factor cf) if no_quantity_error: cf = df["ndefor_obs"].sum() / df["ndefor_pred"].sum() df["ndefor_pred"] = cf * df["ndefor_pred"] # Compute areas in ha df["nfor_obs_ha"] = df["nfor_obs"] * pix_area / 10000 df["ndefor_obs_ha"] = df["ndefor_obs"] * pix_area / 10000 df["ndefor_pred_ha"] = df["ndefor_pred"] * pix_area / 10000 # Export the table of results df.to_csv(tab_file_pred, sep=",", header=True, index=False, index_label=False) # Compute wRMSE w = df["nfor_obs_ha"] / df["nfor_obs_ha"].sum() error_pred = df["ndefor_pred_ha"] - df["ndefor_obs_ha"] squared_error = (error_pred) ** 2 wRMSE = round(np.sqrt(sum(squared_error * w)), 1) # Plot title title = ("Predicted vs. observed deforestation (ha) " "in " + str(csize_km) + " x " + str(csize_km) + " km grid cells.") # Points or identity line p = [df["ndefor_obs_ha"].min(), df["ndefor_obs_ha"].max()] # Plot predictions vs. observations fig = plt.figure(figsize=figsize, dpi=dpi) plt.subplot(111) plt.scatter(df["ndefor_obs_ha"], df["ndefor_pred_ha"], color=None, marker="o", edgecolor="k") plt.plot(p, p, "r-") plt.title(title) if no_quantity_error: plt.suptitle("Predictions have been corrected to avoid " "\"quantity\" error.") plt.xlabel("Observed deforestation (ha)") plt.ylabel("Predicted deforestation (ha)") # Text wRMSE and ncell t = "wRMSE = " + str(wRMSE) + " ha\n n = " + str(ncell) x_text = df["ndefor_obs_ha"].max() y_text = 0 plt.text(x_text, y_text, t, ha="right", va="bottom") fig.savefig(fig_file_pred) plt.close(fig) # Results return {'wRMSE': wRMSE, 'ncell': ncell, 'csize': csize, 'csize_km': csize_km}
# Test # import os # os.chdir("../docsrc/notebooks") # fcc_file = "data/fcc123_GLP.tif" # time_interval = 10 # riskmap_file = "outputs_steps/riskmap.tif" # tab_file_defrate = "outputs_steps/defrate_per_cat.csv" # csize = 40 # tab_file_pred = "outputs_steps/pred_obs.csv" # fig_file_pred = "outputs_steps/pred_obs.png" # figsize = (6.4, 4.8) # dpi = 100 # verbose = True # validation(fcc_file, time_interval, riskmap_file, tab_file_defrate, # csize, tab_file_pred, fig_file_pred) # End