Source code for luna.pathology.cli.dsa.dsa_viz

import click
from decimal import Decimal
import pandas as pd
import json, geojson, ijson
import yaml
import copy
import time, os
from PIL import Image
import re
import numpy as np

from luna.pathology.cli.dsa.utils import get_color, get_continuous_color, \
    vectorize_np_array_bitmask_by_pixel_value

# Base DSA jsons
base_dsa_polygon_element = {"fillColor": "rgba(0, 0, 0, 0)", "lineColor": "rgb(0, 0, 0)","lineWidth": 2,"type": "polyline","closed": True, "points": [], "label": {"value": ""}}
base_dsa_point_element = {"fillColor": "rgba(0, 0, 0, 0)", "lineColor": "rgb(0, 0, 0)","lineWidth": 2,"type": "point", "center": [], "label": {"value": ""}}
base_dsa_annotation  = {"description": "", "elements": [], "name": ""}

# Qupath 20x mag factor
QUPATH_MAG_FACTOR = 0.5011
image_id_regex = "(.*).svs"

[docs]def check_filepaths_valid(filepaths): """Checks if all paths exist. Args: filepaths (list): file paths Returns: bool: True if all file paths exist, False otherwise """ all_files_found = True for filepath in filepaths: if not os.path.exists(filepath): print("ERROR: filepath in config: ", filepath, 'does not exist') all_files_found = False return all_files_found
[docs]def save_dsa_annotation(base_annotation, elements, annotation_name, output_folder, image_filename): """Helper function to save annotation elements to a json file. Args: base_annotation (dict): base annotation structure for DSA elements (list): list of annotation elements annotation_name (string): annotation name for HistomicsUI output_folder (string): path to a directory to save the annotation file image_filename (string): name of the image in DSA e.g. 123.svs Returns: string: annotation file path. None if error in writing the file. """ dsa_annotation = copy.deepcopy(base_annotation) dsa_annotation["elements"] = elements dsa_annotation["name"] = annotation_name image_id = re.search(image_id_regex, image_filename).group(1) annotation_name_replaced = annotation_name.replace(" ","_") os.makedirs(output_folder, exist_ok=True) outfile_name = os.path.join(output_folder, f"{annotation_name_replaced}_{image_id}.json") try: with open(outfile_name, 'w') as outfile: json.dump(dsa_annotation, outfile) return outfile_name except Exception as e: print("ERROR: write permissions needs to be enabled for: ", os.path.dirname(outfile_name)) return None
@click.command() @click.option("-d", "--data_config", help="path to your data config file that includes input/output parameters.", required=True, type=click.Path(exists=True)) @click.option("-s", "--source_type", help="string describing data source that is to be transformed into a dsa json. \ supports stardist-polygon, stardist-cell, regional-polygon, qupath-polygon, bitmask-polygon, heatmap", required=True) def cli(data_config, source_type): """DSA annotation builder data_config - yaml file with input, output, and method parameters. The method parameters differs based on the `source_type`. Please refer to the example configurations files. Some common parameters are: - input: path to results to be visualized. This could be a TSV, bitmasks, GeoJson files. Please see the supported source types for more details. - output_folder: directory where the DSA compatible annotation json file will be saved - image_filename: name of the image file in DSA e.g. 123.svs - annotation_name: name of the annotation to be displayed in DSA source_type - string describing data source that is to be transformed into a dsa json. supports stardist-polygon, stardist-cell, regional-polygon, qupath-polygon, bitmask-polygon, heatmap """ functions = {"stardist-polygon": stardist_polygon, "stardist-cell": stardist_cell, "regional-polygon": regional_polygon, "qupath-polygon": qupath_polygon, "bitmask-polygon": bitmask_polygon, "heatmap": heatmap} if source_type not in functions: print(f"ERROR: source_type {source_type} is not supported") else: annotation_filepath = functions[source_type](data_config) print(f"Annotation written to {annotation_filepath}")
[docs]def stardist_polygon(data_config): """Build DSA annotation json from stardist geojson classification results Args: data_config (string): path to your data config file that includes input/output parameters. input (string): path to stardist geojson classification results output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA line_colors (map, optional): user-provided line color map with {feature name:rgb values} fill_colors (map, optional): user-provided fill color map with {feature name:rgba values} Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) if not check_filepaths_valid([data['input']]): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() # can't handle NaNs for vectors, do this to replace all NaNs # TODO: find better fix # for now: https://stackoverflow.com/questions/17140886/how-to-search-and-replace-text-in-a-file with open(data["input"], 'r') as input_file: filedata = input_file.read() newdata = filedata.replace("NaN","-1") elements = [] for cell in ijson.items(newdata, "item"): label_name = cell['properties']['classification']['name'] coord_list = list(cell['geometry']['coordinates'][0]) # uneven nested list when iterative parsing of json --> make sure to get the list of coords # this can come as mixed types as well, so type checking needed while isinstance(coord_list, list) and isinstance(coord_list[0], list) and not isinstance(coord_list[0][0], (int,float,Decimal)): coord_list = coord_list[0] coords = [ [float(coord[0]), float(coord[1]), 0] for coord in coord_list] element = copy.deepcopy(base_dsa_polygon_element) element["label"]["value"] = label_name line_color, fill_color = get_color(label_name, data.get("line_colors", {}), data.get("fill_colors", {})) element["fillColor"] = fill_color element["lineColor"] = line_color element["points"] = coords elements.append(element) print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, data["annotation_name"], data["output_folder"], data["image_filename"]) return annotatation_filepath
[docs]def stardist_cell(data_config): """Build DSA annotation json from TSV classification data generated by stardist Processes a cell classification data generated by Qupath/stardist and adds the center coordinates of the cells as annotation elements. Args: data_config (string): path to your data config file that includes input/output parameters. input (string): path to TSV classification data generated by stardist output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA line_colors (map, optional): line color map with {feature name:rgb values} fill_colors (map, optional): fill color map with {feature name:rgba values} Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) if not check_filepaths_valid([data['input']]): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() # qupath_stardist_cell_tsv can be quite large to load all columns into memory (contains many feature columns), # so only load baisc columns that are needed for now cols_to_load = ["Name", "Class", "ROI", "Centroid X µm", "Centroid Y µm", "Parent"] df = pd.read_csv(data["input"], sep ="\t", usecols=cols_to_load) # do some preprocessing on the tsv -- e.g. stardist sometimes finds cells in glass df = df[df["Parent"] != "Glass"] # populate json elements elements = [] for idx, row in df.iterrows(): elements_entry = copy.deepcopy(base_dsa_point_element) # x,y coordinates from stardist are in microns so divide by QUPATH_MAG_FACTOR = 0.5011 (exact 20x mag factor used by qupath specifically) x = row["Centroid X µm"]/QUPATH_MAG_FACTOR y = row["Centroid Y µm"]/QUPATH_MAG_FACTOR # Get cell label and add to element label_name = row["Class"] elements_entry["label"]["value"] = label_name # get color and add to element line_color, fill_color = get_color(label_name, data.get("line_colors", {}), data.get("fill_colors", {})) elements_entry["fillColor"] = fill_color elements_entry["lineColor"] = line_color # add centroid coordinate of cell to element center = [x,y,0] elements_entry["center"] = center elements.append(elements_entry) print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, data["annotation_name"], data["output_folder"], data["image_filename"]) return annotatation_filepath
[docs]def regional_polygon(data_config): """Build DSA annotation json from regional annotation geojson Args: data_config (string): path to your data config file that includes input/output parameters. input (string): path to regional annotation geojson output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA line_colors (map, optional): line color map with {feature name:rgb values} fill_colors (map, optional): fill color map with {feature name:rgba values} Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) if not check_filepaths_valid([data['input']]): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() with open(data["input"]) as regional_file: regional_annotation = geojson.loads(geojson.load(regional_file)) elements = [] for annot in regional_annotation['features']: # get label name and add to element element = copy.deepcopy(base_dsa_polygon_element) label_name = annot.properties['label_name'] element["label"]["value"] = label_name # get label specific color and add to element line_color, fill_color = get_color(label_name, data.get("line_colors", {}), data.get("fill_colors", {})) element["fillColor"] = fill_color element["lineColor"] = line_color # add coordinates coords = annot['geometry']['coordinates'] # if coordinates have extra nesting, set coordinates to 2d array. coords_arr = np.array(coords) if coords_arr.ndim == 3 and coords_arr.shape[0] == 1: coords = np.squeeze(coords_arr).tolist() for c in coords: c.append(0) element["points"] = coords elements.append(element) print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, data["annotation_name"], data["output_folder"], data["image_filename"]) return annotatation_filepath
[docs]def qupath_polygon(data_config): """Build DSA annotation json from Qupath polygon geojson Args: data_config (string): path to your data config file that includes input/output parameters. input (string): path to Qupath polygon geojson output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA classes_to_include (list): list of classification labels to visualize e.g. ["Tumor", "Stroma", ...] line_colors (map, optional): line color map with {feature name:rgb values} fill_colors (map, optional): fill color map with {feature name:rgba values} Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) if not check_filepaths_valid([data['input']]): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() with open(data["input"]) as regional_file: pixel_clf_polygons = geojson.load(regional_file) elements = [] for polygon in pixel_clf_polygons: props = polygon.properties if 'classification' not in props: continue label_name = polygon.properties['classification']['name'] if label_name in data['classes_to_include']: element = copy.deepcopy(base_dsa_polygon_element) element["label"]["value"] = label_name # get label specific color and add to element line_color, fill_color = get_color(label_name, data.get("line_colors", {}), data.get("fill_colors", {})) element["fillColor"] = fill_color element["lineColor"] = line_color coords = polygon['geometry']['coordinates'] # uneven nesting of connected components for coord in coords: if isinstance(coord[0], list) and isinstance(coord[0][0], (int,float)): for c in coord: c.append(0) element["points"] = coord elements.append(element) else: for i in range(len(coord)): connected_component_coords = coord[i] connected_component_element = copy.deepcopy(element) for c in connected_component_coords: c.append(0) connected_component_element["points"] = connected_component_coords elements.append(connected_component_element) print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, data["annotation_name"], data["output_folder"], data["image_filename"]) return annotatation_filepath
[docs]def bitmask_polygon(data_config): """Build DSA annotation json from bitmask PNGs Vectorizes and simplifies contours from the bitmask. Args: data_config (string): path to your data config file that includes input/output parameters. input (map): map of {label:path_to_bitmask_png} output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA line_colors (map, optional): line color map with {feature name:rgb values} fill_colors (map, optional): fill color map with {feature name:rgba values} Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) bitmask_filepaths = list(data['input'].values()) if not check_filepaths_valid(bitmask_filepaths): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() elements = [] for bitmask_label, bitmask_filepath in data["input"].items(): Image.MAX_IMAGE_PIXELS = 5000000000 annotation = Image.open(bitmask_filepath) bitmask_np = np.array(annotation) simplified_contours = vectorize_np_array_bitmask_by_pixel_value(bitmask_np) for n, contour in enumerate(simplified_contours): element = copy.deepcopy(base_dsa_polygon_element) label_name = bitmask_label element["label"]["value"] = label_name # get label specific color and add to element line_color, fill_color = get_color(label_name, data.get("line_colors", {}), data.get("fill_colors", {})) element["fillColor"] = fill_color element["lineColor"] = line_color coords = contour.tolist() for c in coords: c.append(0) element["points"] = coords elements.append(element) print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, data["annotation_name"], data["output_folder"], data["image_filename"]) return annotatation_filepath
[docs]def heatmap(data_config): """Generate heatmap based on the tile scores Creates a heatmap for the given column, using the color palette `viridis` to set a fill value - the color ranges from purple to yellow, for scores from 0 to 1. Args: data_config (string): path to your data config file that includes input/output parameters. input (string): path to CSV with tile scores output_folder (string): directory where the DSA compatible annotation json file will be saved image_filename (string): name of the image file in DSA e.g. 123.svs annotation_name (string): name of the annotation to be displayed in DSA column (string): column to visualize e.g. tile_score tile_size (int): size of tiles scale_factor (int, optional): scale to match the image on DSA. By default, 1. Returns: string: annotation file path. None if error in writing the file. """ with open(data_config, 'r') as config_yaml: data = yaml.safe_load(config_yaml) if not check_filepaths_valid([data['input']]): return print("Building annotation for image: {}".format(data['image_filename'])) start = time.time() df = pd.read_csv(data["input"]) scaled_tile_size = int(data["tile_size"]) * int(data.get("scale_factor", 1)) elements = [] for _, row in df.iterrows(): element = copy.deepcopy(base_dsa_polygon_element) label_value = row[data["column"]] element["label"]["value"] = str(label_value) # get label specific color and add to elements line_color, fill_color = get_continuous_color(label_value) element["fillColor"] = fill_color element["lineColor"] = line_color # convert coordinate string to tuple using eval x,y = eval(row["coordinates"]) pixel_x = x * scaled_tile_size pixel_y = y * scaled_tile_size coords = [[pixel_x, pixel_y], [pixel_x+scaled_tile_size, pixel_y], [pixel_x+scaled_tile_size, pixel_y+scaled_tile_size], [pixel_x, pixel_y+scaled_tile_size],[pixel_x, pixel_y]] for c in coords: c.append(0) element["points"] = coords elements.append(element) annotation_name = data["column"] + "_" + data["annotation_name"] print("Time to build annotation", time.time() - start) annotatation_filepath = save_dsa_annotation(base_dsa_annotation, elements, annotation_name, data["output_folder"], data["image_filename"]) return annotatation_filepath
if __name__ == '__main__': cli()