Source code for luna.pathology.common.slideviewer_client

'''
Created on January 31, 2021

@author: pashaa@mskcc.org

Functions for downloading annotations from SlideViewer
'''
import os, shutil
from typing import Dict
import zipfile
import requests
import logging

logger = logging.getLogger(__name__)

[docs]def get_slide_id(full_filename:str) -> str:
    """get slide id
    
    Get slide id from the slideviewer full file name. The full_filename in 
    the slideview csv is of the format: year;HOBS_ID;slide_id.svs
    for example: 2013;HobS13-283072057510;1435197.svs

    Args:
        full_filename (str): full filename of slide 
    
    Returns:    
        str: numeric slide id
    """
    return full_filename.split(";")[-1].replace(".svs", "")


[docs]def fetch_slide_ids(url:str, project_id:int, dest_dir:str, csv_file:str=None)->list:
    """get slide ids

    Fetch the list of slide ids from the slideviewer server for the project with the
    specified project id. Alternately, a slideviewer csv file may be provided to 
    override download from server.

    Args:
        url (str or None): slideviewer url. url may be None if csv_file is specified.
        project_id (int): slideviewer project id from which to fetch slide ids
        dest_dir (str): directory where csv file should be downloaded
        csv_file (str): slideviewer csv file may be provided to override the need 
        to download the file
    
    Returns:
        list:  list of (slideviewer_path, slide_id, sv_project_id)
    """

    # run on all slides from specified SLIDEVIEWER_CSV file.
    # if file is not specified, then download file using slideviewer API
    # download entire slide set using project id
    # the file is then written to the dest directory
    new_csv_file = os.path.join(dest_dir, 'project_' + str(project_id) + '.csv')

    if csv_file == None or \
            csv_file == '' or not \
            os.path.exists(csv_file):

        url = url + 'exportProjectCSV?pid={pid}'.format(pid=str(project_id))
        res = requests.get(url)

        with open(new_csv_file, "wb") as slideoutfile:
            slideoutfile.write(res.content)

    else:
        # copy given csv_file to dest directory
        shutil.copy(csv_file, new_csv_file)

    # read slide ids
    slides = []
    with open(new_csv_file) as slideoutfile:
        # skip first 4 lines
        count = 0
        for line in slideoutfile:
            count += 1
            if count == 4:
                break

        # read whole slide image file names contained in the project in slide viewer
        for line in slideoutfile:
            full_filename = line.strip()
            slidename = get_slide_id(full_filename)
            slides.append([full_filename, slidename, project_id])

    return slides


[docs]def download_zip(url:str, dest_path:str, chunk_size:int = 128)->bool:
    """Download zip file

    Downloads zip from the specified URL and saves it to the specified file path.
    see https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url
    
    Args:
        url (str): slideviewer url to download zip from 
        dest_path (str): file path where zipfile should be saved
        chunk_size (int): size in bytes of chunks to batch out during download
    
    Returns:
        bool: True if zipfile downloaded and saved successfully, else false
    """
    
    response = requests.get(url, stream=True)
    with open(dest_path, 'wb') as fd:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk == b'Label image not found.':  # message from slideviewer
                return False
            else:
                fd.write(chunk)
        return True

[docs]def unzip(zipfile_path:str) -> any:
    """unzip zip file

    Args:
        zipfile_path (str): path of zipfile to unzip
    
    Returns:
        readfile pointer to unzippped file if successfully unzippped, else None
    """
    logger.info("Unzipping " + zipfile_path)
    try:
        return zipfile.ZipFile(zipfile_path)  # returns read file pointer
    except zipfile.BadZipFile as err:
        logger.exception('Dumping invalid Zipfile ' + zipfile_path + ':')
        return None


[docs]def download_sv_point_annotation(url:str) -> Dict[str, any]:
    """download slideviwer point annotation

    Calls slideviewer API with the given url

    Args:
        url (str): slide viewer api to call
    
    Returns:
        dict[str, any]: json response
    """
    try:
        response = requests.get(url)
        data = response.json()
    except Exception as err:
        logger.exception("General exception raised while trying " + url)
        return None

    logger.info("Found data = " + str(data))
    if(str(data) != '[]'):
        return data
    else:
        logger.warning("Label annotation file does not exist for slide and user.")
        return None
Luna documentation

Source code for luna.pathology.common.slideviewer_client