Source code for luna.pathology.common.build_geojson

# Uses numpy array containing annotation and splits into label masks. For each label mask, the annotations
# are vectorized into a set of polygons. These polygons are then converted into the geoJSON format and written to file.
from typing import List, Dict
from skimage import measure
import numpy as np
import pandas as pd
import json
import ast
import copy
import signal
import shapely 
from shapely.geometry import Polygon, MultiPolygon, shape, mapping

from dask.distributed import secede, rejoin

# max amount of time for a geojson to be generated. if generation surpasses this limit, it is likely the annotation file is
# too large or they may be annotation artifacts present in the slide. currently set at 30 minute timeout
TIMEOUT_SECONDS = 1800

DEFAULT_LABELSET_NAME = 'DEFAULT_LABELS'
# Base template for geoJSON file
geojson_base = {
    "type": "FeatureCollection",
    "features": []
}

[docs]def build_geojson_from_pointclick_json(labelsets: dict, labelset:str, sv_json:List[dict])-> list: """Build geoJSON m slideviewer JSON This method extracts point annotations from a slideviwer json object and converts them to a standardized geoJSON format Args: labelsets (dict): dictionary of label set as string (e.g. {labelset: {label_number: label_name}}) labelset (str): the name of the labelset e.g. default_labels sv_json (list[dict]): annotatations from slideviwer in the form of a list of dictionaries Returns: list: a list of geoJSON annotation objects """ labelsets = ast.literal_eval(labelsets) mappings = labelsets[labelset] output_geojson = [] for entry in sv_json: point = {} x = int(entry['x']) y = int(entry['y']) class_num = int(entry['class']) if class_num not in mappings: continue class_name = mappings[class_num] coordinates = [x,y] point["type"] = "Feature" point["id"] = "PathAnnotationObject" point["geometry"] = {"type": "Point", "coordinates": coordinates} point["properties"] = {"classification": {"name": class_name}} output_geojson.append(point) return output_geojson
[docs]def find_parents(polygons: list) -> list: """determines of parent child relationships of polygons Returns a list of size n (where n is the number of input polygons in the input list polygons) where the value at index n cooresponds to the nth polygon's parent. In the case of no parent, -1 is used. for example, parent_nums[0] = 2 means that polygon 0's parent is polygon 2 Args: polygons (list): a list of shapely polygon objects Returns: list: a list of parent-child relationships for the polygon objects """ parent_nums = [] for child in polygons: found_parent = False for parent_idx, parent in enumerate(polygons): if child == parent: continue # found parent for child if parent.contains(child): parent_nums.append(parent_idx) found_parent = True break # finished looping through all potential parents, so child is a parent if not found_parent: parent_nums.append(-1) print(parent_nums) return parent_nums
# TODO test performance with/without polygon-tolerance. approximate_polygons(polygon_tolerance) might just be a slow and unnecessary step. # adapted from: https://github.com/ijmbarr/image-processing-with-numpy/blob/master/image-processing-with-numpy.ipynb
[docs]def add_contours_for_label(annotation_geojson:Dict[str, any], annotation:np.ndarray, label_num:int, mappings:dict, contour_level:float) -> Dict[str, any]: """creates geoJSON feature dictionary for labels Finds the contours for a label mask, builds a polygon and then converts the polygon to geoJSON feature dictionary Args: annotation_geojson (dict[str, any]): geoJSON result to populate annotation (np.ndarray): npy array of bitmap label_num (int): the integer cooresponding to the annotated label mappings (dict): label map for specified label set contour_level (float): value along which to find contours in the array Returns: dict[str, any]: geoJSON with label countours """ if label_num in annotation: print("Building contours for label " + str(label_num)) num_pixels = np.count_nonzero(annotation == label_num) print("num_pixels with label", num_pixels) mask = np.where(annotation==label_num,1,0).astype(np.int8) contours = measure.find_contours(mask, level = contour_level) print("num_contours", len(contours)) polygons = [Polygon(np.squeeze(c)) for c in contours] parent_nums = find_parents(polygons) polygon_by_index_number = {} for index, parent in enumerate(parent_nums): contour = contours[index] contour_list = contour.tolist() # switch coordinates, otherwise gets flipped for coord in contour_list: x = int(coord[0]) y = int(coord[1]) coord[0] = y coord[1] = x # this polygon does not have parent, so this is a parent object (top level) if parent == -1: polygon = {"type":"Feature", "properties":{}, "geometry":{"type":"Polygon", "coordinates": []}} polygon['properties']['label_num'] = int(label_num) polygon['properties']['label_name'] = mappings[label_num] polygon['geometry']['coordinates'].append(contour_list) polygon_by_index_number[index] = polygon else: # this is a child object, add coordinates as a hole to the parent polygon # fetch parent's polygon parent_polygon = polygon_by_index_number[parent] # append as hole to parent parent_polygon['geometry']['coordinates'].append(contour_list) # add parent polygon feature dicts to running annotation geojson object for index, polygon in polygon_by_index_number.items(): annotation_geojson['features'].append(polygon) else: print("No label " + str(label_num) + " found") return annotation_geojson
[docs]def handler(signum:str, frame:str) -> None: """signal handler for geojson Args: signum (str): signal number fname (str): filename for which exception occurred Returns: None """ raise TimeoutError("Geojson generation timed out.")
[docs]def build_labelset_specific_geojson(default_annotation_geojson:Dict[str, any], labelset:dict) -> Dict[str, any]: """builds geoJSON for labelset Instead of working with a large geJSON object, you can extact polygons that coorspond to specific labels into a smaller object. Args: default_annotation_geojson (dict[str, any]): geoJSON annotation file labelset (dict): label set dictionary Returns: dict[str, any]: geoJSON with only polygons from provided labelset """ annotation_geojson = copy.deepcopy(geojson_base) for feature in default_annotation_geojson['features']: # number is fixed label_num = feature['properties']['label_num'] # add polygon to json, change name potentially needed if label_num in labelset: new_feature_polygon = copy.deepcopy(feature) # get new name and change new_label_name = labelset[label_num] new_feature_polygon['properties']['label_name'] = new_label_name # add to annotation_geojson being built annotation_geojson['features'].append(new_feature_polygon) # no polygons containing labels in labelset if len(annotation_geojson['features']) == 0: return None return annotation_geojson
[docs]def build_all_geojsons_from_default(default_annotation_geojson:Dict[str, any], all_labelsets:List[dict], contour_level:float) -> dict: """builds geoJSON objects from a set of labels wraps build_labelset_specific_geojson with logic to generate annotations from multiple labelsets Args: default_annotation_geojson (dict[str, any]): input geoJSON all_labelsets (list[dict]): a list of dictionaries containing label sets contour_level (float): value along which to find contours Returns: dict: a dictionary with labelset name and cooresponding geoJSON as key, value pairs """ labelset_name_to_labelset_specific_geojson = {} for labelset_name, labelset in all_labelsets.items(): if labelset_name != DEFAULT_LABELSET_NAME: # use default labelset geojson to build labelset specific geojson annotation_geojson = build_labelset_specific_geojson(default_annotation_geojson, labelset) else: annotation_geojson = default_annotation_geojson # only add if geojson not none (built correctly and contains >= 1 polygon) if annotation_geojson: labelset_name_to_labelset_specific_geojson[labelset_name] = json.dumps(annotation_geojson) return labelset_name_to_labelset_specific_geojson
[docs]def build_default_geojson_from_annotation(annotation_npy_filepath:str, all_labelsets:dict, contour_level:float): """builds geoJSONS from numpy annotation with default label set Args: annotation_npy_filepath (str): string to numpy annotation all_labelsets (dict): a dictionary of label sets contour_level (float): value along which to find contours Returns: dict[str, any]: the default geoJSON annotation """ annotation = np.load(annotation_npy_filepath) default_annotation_geojson = copy.deepcopy(geojson_base) # signal logic doesn't work in dask distributed setup default_labelset = all_labelsets[DEFAULT_LABELSET_NAME] if not (annotation > 0).any(): print(f"No annotated pixels detected in bitmap loaded from {annotation_npy_filepath}") return None # vectorize all for label_num in default_labelset: default_annotation_geojson = add_contours_for_label(default_annotation_geojson, annotation, label_num, default_labelset, float(contour_level)) # empty geojson created, return nan and delete from geojson table if len(default_annotation_geojson['features']) == 0: print(f"Something went wrong with building default geojson from {annotation_npy_filepath}, quitting") return None return default_annotation_geojson
[docs]def build_geojson_from_annotation(df: pd.DataFrame) -> pd.DataFrame: """Builds geoJSON for all annotation labels in the specified labelset. Args: df (pandas.DataFrame): input regional annotation table Returns: pandasDataFrame: dataframe with geoJSON field poopulated """ labelsets = df.label_config.values[0] annotation_npy_filepath = df.npy_filepath.values[0] labelset = df.labelset.values[0] contour_level = df.contour_level.values[0] labelsets = ast.literal_eval(labelsets) mappings = labelsets[labelset] print("\nBuilding GeoJSON annotation from npy file:", annotation_npy_filepath) annotation = np.load(annotation_npy_filepath) annotation_geojson = copy.deepcopy(geojson_base) signal.signal(signal.SIGALRM, handler) signal.alarm(TIMEOUT_SECONDS) try: for label_num in mappings: annotation_geojson = add_contours_for_label(annotation_geojson, annotation, label_num, mappings, float(contour_level)) except TimeoutError as err: print("Timeout Error occured while building geojson from slide", annotation_npy_filepath) raise err # disables alarm signal.alarm(0) # empty geojson created, return nan and delete from geojson table if len(annotation_geojson['features']) == 0: return df df["geojson"] = json.dumps(annotation_geojson) return df
[docs]def concatenate_regional_geojsons(geojson_list: List[Dict[str, any]]) -> Dict[str, any]: """concatenate regional annotations Concatenates geojsons if there are more than one annotations for the labelset. Args: geojson_list (list[dict[str, any]]): list of geoJSON strings Returns: dict[str, any]: a single concatenated geoJSON """ # create json from str representations geojson_list = [json.loads(geojson) for geojson in geojson_list] concat_geojson = geojson_list[0] if len(geojson_list) == 1: return concat_geojson # create concatenated geojson for json_dict in geojson_list[1:]: print(f"Concatenating {len(geojson_list)} geojsons") concat_geojson['features'].extend(json_dict['features']) return concat_geojson