Source code for dendromatics.individualize

import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

from .voxel.voxel import *

# -----------------------------------------------------------------------------
# compute_axes
# -----------------------------------------------------------------------------


[docs] def compute_axes( voxelated_cloud, clust_stripe, stripe_lower_limit, stripe_upper_limit, h_range, min_points, d_max, X_field, Y_field, Z_field, Z0_field, tree_id_field, progress_hook=None, ): """Function used inside individualize_trees during tree individualization process. It identifies tree axes. It expects a voxelated version of the point cloud and a filtered (based on the verticality clustering process) stripe as input, so that it only contains (hopefully) stems. Those stems are isolated and enumerated, and then, their axes are identified using PCA (PCA1 direction). This allows to group points based on their distance to those axes, thus assigning each point to a tree. It requires a height-normalized cloud in order to function properly. Parameters ---------- voxelated_cloud : numpy.ndarray The voxelated point cloud. It is expected to have X, Y, Z and Z0 fields. clust_stripe : numpy.ndarray The point cloud containing the clusterized stripe (the stems) from verticality_clustering. stripe_lower_limit : float Lower (vertical) limit of the stripe (units is meters). Defaults to 0.7. stripe_upper_limit : float Upper (vertical) limit of the stripe (units is meters). Defaults to 3.5. h_range : float Only stems where points extend vertically throughout a range as tall as defined by h_range are considered. min_points : int Minimum number of points in a cluster for it to be considered as a potential stem d_max : float Points that are closer than d_max to an axis are assigned to that axis. X_field : int Index at which (x) coordinates are stored. Y_field : int Index at which (y) coordinates are stored. Z_field : int Index at which (z) coordinates are stored. Z0_field : int Index at which (z0) coordinates are stored. tree_id_field : int Index at which cluster ID is stored. progress_hook : callable, optional A hook that take two int, the first is the current number of iteration and the second is the targetted number iteration. Defaults to None. Returns ------- detected_trees : numpy.ndarray Matrix with as many rows as trees, containing a description of each individualized tree. It stores the following values: tree ID, PCA1 X value, PCA1 Y value, PCA1 Z value, stem centroid X value, stem centroid Y value, stem centroid Z value, height difference of stem centroid (z - z0), axis vertical deviation. dist_to_axis : numpy.ndarray Matrix containing the distance from each point to the closest axis. tree_id_vector : numpy.ndarray Vector containing the tree IDs. """ # Empty vectors that will store final outputs: - distance from each point to # closest axis - ID of the corresponding tree (the tree that the point belongs to). dist_to_axis = ( np.zeros((np.size(voxelated_cloud, 0))) + 100000 ) # distance to the closest axis tree_id_vector = ( np.zeros((np.size(voxelated_cloud, 0))) + 100000 ) # tree ID of closest axis # Set of all possible trees (trunks at this stage) and number of points associated to each: unique_values, n = np.unique(clust_stripe[:, tree_id_field], return_counts=True) # Filtering of possible trees that do not contain enough points to be considered. filt_unique_values = unique_values[n > min_points] # Final number of trees n_values = np.size(filt_unique_values) # Empty array to be filled with several descriptors of the trees. In the following order: # tree ID | PCA1 X value | PCA1 Y value | PCA1 Z value | trunk centroid X value # | trunk centroid Y value | trunk centroid Z value | height difference | # It has as many rows as trees are. detected_trees = np.zeros((np.size(filt_unique_values, 0), 9)) # Auxiliar index used to display progress information. ind = 0 # Height range (actual value, not the %) that points should extend throughout h_range_value = (stripe_upper_limit - stripe_lower_limit) * h_range # First loop: It goes over each tree (still stems) except for the first entry, # which maps to noise (this entry is generated by DBSCAN during clustering). if progress_hook is not None: progress_hook(0, n_values) for i in filt_unique_values: # Isolation of stems: stem_i only contains points associated to 1 tree. stem_i = clust_stripe[clust_stripe[:, tree_id_field] == i][ :, [X_field, Y_field, Z_field] ] # Z and Z0 mean heights of points in a given tree z_z0 = np.average( clust_stripe[clust_stripe[:, tree_id_field] == i][:, [Z_field, Z0_field]], axis=0, ) # Difference between Z and Z0 mean heights diff_z_z0 = z_z0[0] - z_z0[1] # Second loop: only stems where points extend vertically throughout its # whole range are considered. if np.ptp(stem_i[:, Z_field]) > (h_range_value): # PCA and centroid computation. pca_out = PCA(n_components=3) pca_out.fit(stem_i) centroid = np.mean(stem_i, 0) # Values are stored in tree vector detected_trees[ind, 0] = i # tree ID detected_trees[ind, 1:4] = pca_out.components_[ 0, : ] # PCA1 X value | PCA1 Y value | PCA1 Z value detected_trees[ ind, 4:7 ] = centroid # stem centroid X value | stem centroid Y value | stem centroid Z value detected_trees[ind, 7] = diff_z_z0 # Height difference detected_trees[ind, 8] = np.abs( np.arctan( np.sqrt(detected_trees[ind, 1] ** 2 + detected_trees[ind, 2] ** 2) / detected_trees[ind, 3] ) * 180 / np.pi ) ind = ind + 1 if progress_hook is not None: progress_hook(ind, n_values) # Coordinate transformation from original to PCA. Done for EVERY # point of the original cloud from the PCA of a SINGLE stem. cloud_pca_coords = pca_out.transform( voxelated_cloud[:, [X_field, Y_field, Z_field]] ) # Distance from every point in the new coordinate system to the axes. # It is directly computed from the cuadratic component of PC2 and PC3 axis_dist = np.hypot(cloud_pca_coords[:, 1], cloud_pca_coords[:, 2]) # Points that are closer than d_max to an axis are assigned to that axis. # Also, if a point is closer to an axis than it was to previous axes, accounting for points # that were previously assigned to some other axis in previous iterations, it is assigned # to the new, closer axis. Distance to the axis is stored as well. valid_points = (axis_dist < d_max) & ((axis_dist - dist_to_axis) < 0) tree_id_vector[valid_points] = i dist_to_axis[valid_points] = axis_dist[valid_points] # This deletes the trailing rows that only contains zeros detected_trees = detected_trees[~np.all(detected_trees == 0, axis=1)] return (detected_trees, dist_to_axis, tree_id_vector)
# ----------------------------------------------------------------------------- # compute_heights # -----------------------------------------------------------------------------
[docs] def compute_heights( voxelated_cloud, detected_trees, dist_to_axis, tree_id_vector, d, max_dev, resolution_heights, n_digits, X_field, Y_field, Z_field, ): """Function used inside individualize_trees during tree individualization process. It measures tree heights. The function creates a large-resolution voxel cloud to and filters voxels containing few points. This has the purpose to discard any outlier point that might be over the trees, to then identify the highest point within the remaining voxels. It requires a height-normalized cloud in order to function properly. Parameters ---------- voxelated_cloud : numpy.ndarray The voxelated point cloud. It is expected to have X, Y, Z and Z0 fields. detected_trees : numpy.ndarray See compute_axes. dist_to_axis : numpy.ndarray See compute_axes. tree_id_vector : numpynd.array See compute_axes. d : float Points within this distance from tree axis will be considered as potential points to define tree height. max_dev : float Maximum degree of vertical deviation of a tree axis to consider its tree height measurement as valid. resolution_heights : float (x, y, z) voxel resolution. n_digits : int Number of digits dedicated to each coordinate ((x), (y) or (z)) during the generation of each point code. X_field : int Index at which (x) coordinates are stored. Y_field : int Index at which (y) coordinates are stored. Z_field : int Index at which (z) coordinates are stored. Returns ------- tree_heights: numpy.ndarray Matrix containing (x, y, z) coordinates of each tree's highest point, as well as its normalized height and a binary field stating if the axis was deviated (1) or if it was not (0). """ # The cloud is re-voxelated to a larger resolution to then be clusterized. # Small clusters containing 1-2 voxels will be discarded to eliminate # outliers points that could interfere in height measurement. large_voxels_cloud, large_vox_to_cloud_ind, _ = voxelate( voxelated_cloud, resolution_heights, resolution_heights, n_digits, X_field, Y_field, Z_field, with_n_points=False, ) # eps for DBSCAN eps_heights = resolution_heights * 1.9 # Large-resolution voxelated cloud is clusterized clustering = DBSCAN(eps=eps_heights, min_samples=2).fit(large_voxels_cloud) # Cluster labels are attached to the fine-resolution voxelated cloud voxelated_cloud = np.append( voxelated_cloud, np.expand_dims(clustering.labels_[large_vox_to_cloud_ind], axis=1), axis=1, ) # Tree IDS are attached to the fine-resolution voxelated cloud too voxelated_cloud = np.append( voxelated_cloud, np.expand_dims(tree_id_vector, axis=1), axis=1 ) # Eliminating all points too far away from axes voxelated_cloud = voxelated_cloud[dist_to_axis < d, :] # Set of all cluster labels and their cardinality: cluster_id = {1,...,K}, # K = 'number of clusters'. cluster_id, K = np.unique(clustering.labels_, return_counts=True) # Filtering of labels associated only to clusters that contain a minimum number of points. large_clusters = cluster_id[K > 3] # Discarding points that do not belong to any cluster large_clusters = large_clusters[large_clusters != -1] # Eliminating all points that belong to clusters with less than 2 points (large voxels) voxelated_cloud = voxelated_cloud[np.isin(voxelated_cloud[:, -2], large_clusters)] # Only pick the actual trees: n_trees = detected_trees.shape[0] tree_heights = np.zeros((n_trees, 5)) for i in range(n_trees): # Last row of tree_vector # Be aware this finds the highest voxel (fine-resolution), not the highest point. valid_id = detected_trees[i, 0] single_tree = voxelated_cloud[ voxelated_cloud[:, -1] == valid_id, 0:3 ] # Just the (x, y, z) coordinates which_z_max = np.argmax(single_tree[:, 2]) # The highest (z) value highest_point = single_tree[which_z_max, :] # The highest point tree_heights[i, 0:3] = highest_point tree_heights[i, 3] = highest_point[2] - detected_trees[i, 7] # (z0) # If tree is deviated from vertical, 1, else, 0. if detected_trees[i, -1] > [max_dev]: tree_heights[i, -1] = 0 else: tree_heights[i, -1] = 1 return tree_heights
# ----------------------------------------------------------------------------- # individualize_trees # -----------------------------------------------------------------------------
[docs] def individualize_trees( cloud, clust_stripe, resolution_z=0.035, resolution_xy=0.035, stripe_lower_limit=0.7, stripe_upper_limit=3.5, h_range=1.2, d_max=1.5, min_points=20, d=15, max_dev=25, resolution_heights=0.3, n_digits=5, X_field=0, Y_field=1, Z_field=2, Z0_field=3, tree_id_field=-1, progress_hook=None, ): """This function expects a filtered (based on the clustering process) stripe as input, so that it only contains (hopefully) stems. Those stems are voxelated and enumerated, and then, their axes are identified using PCA (PCA1 direction). This allows to group points based on their distance to those axes, thus assigning each point to a tree. This last step is applied to the WHOLE original cloud. It also measures tree heights. It requires a height-normalized cloud in order to function properly. Parameters ---------- cloud : numpy.ndarray The point cloud. It is expected to have X, Y, Z and Z0 fields. clust_stripe : numpy.ndarray. The point cloud containing the clusterized stripe from verticality_clustering. It is expected to have X, Y, Z0 and cluster ID fields. resolution_z : float (x, y) voxel resolution in meters. Defaults to 0.035. resolution_xy : float (z) voxel resolution in meters. Defaults to 0.035. stripe_lower_limit : float Lower (vertical) limit of the stripe (units is meters). Defaults to 0.7. stripe_upper_limit : float Upper (vertical) limit of the stripe (units is meters). Defaults to 3.5. h_range : float Only stems where points extend vertically throughout a range as tall as defined by h_range are considered (units is meters). Defaults to 1.2. d_max : float Points that are closer than d_max to an axis are assigned to that axis (units is meters). Defaults to 1.5. min_points : int Minimum number of points in a cluster for it to be considered as a potential stem. Defaults to 20. d : float Points within this distance from tree axis will be considered as potential points to define tree height (units is meters). Defaults to 15. max_dev : float Maximum degree of vertical deviation of a tree axis to consider its tree height measurement as valid (units is sexagesimal degrees). Defaults to 25. n_digits : int Number of digits dedicated to each coordinate ((x), (y) or (z)) during the generation of each point code. Defaults to 5. resolution_heights : float (x, y, z) voxel resolution in meters used during height computation. Defaults to 0.3 X_field : int Index at which (x) coordinate is stored. Defaults to 0. Y_field : int Index at which (y) coordinate is stored. Defaults to 1. Z_field : int Index at which (z) coordinate is stored. Defaults to 2. Z0_field : int Index at which (z0) coordinate is stored. Defaults to 3. tree_id_field : int Index at which cluster ID is stored. Defaults to -1. progress_hook : callable, optional A hook that take two int, the first is the current number of iteration and the second is the targetted number iteration. Defaults to None. Returns ------- assigned_cloud : numpy.ndarray Point cloud containing individualized trees. It consists of (x), (y), (z) and (z0) coordinates, a 5th column containing tree ID that each point belongs to and a 6th column containing point distance to closest axis. detected_trees : numpy.ndarray Matrix with as many rows as trees, containing a description of each individualized tree. It stores tree ID, PCA1 X value, PCA1 Y value, PCA1 Z value, stem centroid X value, stem centroid Y value, stem centroid Z value, height difference of stem centroid (z - z0), axis vertical deviation. tree_heights : numpy.ndarray Matrix containing the heights of individualized trees. It consists of (x), (y), (z) and (z0) coordinates of the highest point of the tree and a 5th column containing a binary indicator: 0 - tree was too deviated from vertical, and height may not be accurate, or 1 - tree was not too deviated from vertical, thus height may be trusted. """ # Whole original cloud voxelization voxelated_cloud, vox_to_cloud_ind, _ = voxelate( cloud, resolution_z, resolution_xy, n_digits, X_field, Y_field, Z_field, with_n_points=False, ) # Call to compute_axes detected_trees, dist_to_axis, tree_id_vector = compute_axes( voxelated_cloud, clust_stripe, stripe_lower_limit, stripe_upper_limit, h_range, min_points, d_max, X_field, Y_field, Z_field, Z0_field, tree_id_field, progress_hook, ) # Call to compute_heights tree_heights = compute_heights( voxelated_cloud, detected_trees, dist_to_axis, tree_id_vector, d, max_dev, resolution_heights, n_digits, X_field, Y_field, Z_field, ) # Two new fields are added to the original cloud: - tree ID (id of closest axis) # - distance to that axis assigned_cloud = np.append( cloud, tree_id_vector[vox_to_cloud_ind, np.newaxis], axis=1 ) assigned_cloud = np.append( assigned_cloud, dist_to_axis[vox_to_cloud_ind, np.newaxis], axis=1 ) # Output: - Assigned cloud (X, Y, Z, Z0, tree_id, dist_to_axis) - tree vector return assigned_cloud, detected_trees, tree_heights