""" Higher-level transformation functions """
import pandas as pd
import scipy.stats
import numpy as np
from pathlib import Path
from luna.pathology.spatial.stats import *
[docs]def generate_k_function_statistics(cell_paths, method_data, main_index=None):
"""
Compute K-function spatial statistics on given cell-data
Args:
cell_paths (str or list[str]): paths to a single or multiple FOV regions
method_data (dict): Configuration:
"index": (str, optional) Column containting the patient/desired ID, if available (overrides main_index)
"phenotype1" : {
"name" : (str) Column name to query
'value' : (str) Phenotype string to match (e.g. CD68)
},
"phenotype2" : {
"name" : (str) Column name to query
'value' : (str) Phenotype string to match (e.g. panCK)
},
"count" : (bool) Flag to compute counting stats.
"radius" : (float) Radius cutoff
"intensity" : (str, optional) Column containing intensity information
"distance" : (bool) Flag to compute intensity-distance stats.
Returns:
pd.DataFrame: spatial statistics aggregated over FOVs
"""
if type(cell_paths)==str:
cell_paths = [cell_paths]
print (cell_paths)
agg_k_data = {}
pheno1_col = method_data["phenotype1"]["name"]
pheno1_val = method_data["phenotype1"]["value"]
pheno2_col = method_data["phenotype2"]["name"]
pheno2_val = method_data["phenotype2"]["value"]
index_col = method_data.get("index", None)
radius = method_data["radius"]
count = method_data["count"]
distance = method_data["distance"]
intensity_col = method_data.get("intensity", None)
indices = set()
for cell_path in cell_paths:
if Path(cell_path).suffix == ".parquet":
df = pd.read_parquet( cell_path )
elif Path(cell_path).suffix == ".csv":
df = pd.read_csv( cell_path )
else:
raise RuntimeError(f"Invalid input data type {cell_path}")
# Look up the index for this slice
if index_col:
index = df[method_data['index']].iloc[0]
indices.add(index)
# Create the data arrays
pheno1 = df[df[pheno1_col] == pheno1_val]
pheno2 = df[df[pheno2_col] == pheno2_val]
p1XY = np.array(pheno1[["Centroid X µm","Centroid Y µm"]])
p2XY = np.array(pheno2[["Centroid X µm","Centroid Y µm"]])
if intensity_col:
I = np.array(pheno2[intensity_col])
else:
I = []
if distance:
raise RuntimeError("Can't compute intensity-distance function without intensity information")
if p1XY.size == 0:
print(f"WARNING: List of phenotype 1 cells ({pheno1_val}) is empty for {index}")
if p2XY.size == 0:
print(f"WARNING: List of phenotype 2 cells ({pheno2_val}) is empty for {index}")
# Compute the K function
print (f"Running... {cell_path}")
fov_k_data = Kfunction(p1XY, p2XY, radius, ls=True, count=count, intensity=I, distance=distance)
for key in fov_k_data:
if key in agg_k_data:
np.append(agg_k_data[key],fov_k_data[key])
else:
agg_k_data[key] = fov_k_data[key]
data_out = {}
for kfunct in agg_k_data.keys():
arr = agg_k_data[kfunct]
if len(arr)==0: arr=[0]
data_out.update(
{
f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_mean": np.mean(arr),
f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_variance": np.var(arr),
f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_skew": scipy.stats.skew(arr),
f"For_{pheno1_val}_Find_{pheno2_val}_at{radius}_{kfunct}_{intensity_col}_kurtosis": scipy.stats.kurtosis(arr)
}
)
df_slice_out = pd.DataFrame(data_out, index=[0]).astype(np.float64)
if main_index is None:
if not len(indices)==1:
raise RuntimeError (f"Multiple cell maps with different indices! Found: {indices}")
main_index = indices.pop()
df_slice_out['main_index'] = main_index
df_slice_out = df_slice_out.set_index('main_index')
print (df_slice_out)
return df_slice_out