Source code for cloudreg.scripts.stitching

# local imports
from .util import (
    S3Url,
    upload_file_to_s3,
)

# generate xml_import and terastitcher commands
from configparser import ConfigParser
import math
import argparse
from psutil import virtual_memory
import joblib
import boto3
import os
import subprocess
import shlex
from glob import glob
from tqdm import tqdm

parastitcher_path = f"{os.path.dirname(os.path.realpath(__file__))}/parastitcher.py"
paraconverter_path = f"{os.path.dirname(os.path.realpath(__file__))}/paraconverter.py"
python_path = "python"

STITCH_ONLY, COMPUTE_ONLY, ALL_STEPS = range(3)


[docs]def write_import_xml(fname_importxml, scanned_matrix, metadata):
    """Write xml_import file for Terastitcher based on COLM metadata

    Args:
        fname_importxml (str): Path to wheer xml_import.xml should be stored
        scanned_matrix (list of lists): List of locations that have been imaged by the microscope
        metadata (dict): Metadata assocated with this COLM experiment
    """
    img_regex = ".*.tiff"
    eofl = "\r\n"
    with open(fname_importxml, "w") as fp:
        fp.writelines(
            [
                f'<?xml version="1.0" encoding="UTF-8" ?>{eofl}',
                f'<!DOCTYPE TeraStitcher SYSTEM "TeraStitcher.DTD">{eofl}',
                f'<TeraStitcher volume_format="TiledXY|2Dseries">{eofl}',
                f"\t<stacks_dir value=\"{metadata['stack_dir']}\" />{eofl}",
                f'\t<ref_sys ref1="1" ref2="2" ref3="3" />{eofl}',
                f"\t<voxel_dims V=\"{metadata['voxel_size'][1]}\" H=\"{metadata['voxel_size'][0]}\" D=\"{metadata['voxel_size'][2]}\" />{eofl}",
                f"\t<origin V=\"{metadata['origin'][1]}\" H=\"{metadata['origin'][0]}\" D=\"{metadata['origin'][2]}\" />{eofl}",
                f"\t<mechanical_displacements V=\"{metadata['mechanical_displacements'][1]}\" H=\"{metadata['mechanical_displacements'][0]}\" />{eofl}",
                f"\t<dimensions stack_rows=\"{metadata['grid_size_Y']}\" stack_columns=\"{metadata['grid_size_X']}\" stack_slices=\"{metadata['num_slices']}\" />{eofl}",
                f"\t<STACKS>{eofl}",
            ]
        )
        # print(metadata['grid_size_Y'])
        # print(metadata['grid_size_X'])
        for j in range(metadata["grid_size_Y"]):
            for i in range(metadata["grid_size_X"]):
                abs_X_ef = i * metadata["abs_X"]
                abs_Y_ef = j * metadata["abs_Y"]
                folder_num = i + j * metadata["grid_size_X"]
                dir_name = f"LOC{folder_num:03}"
                if scanned_matrix[j][i] == "1":
                    loc_string = f"\t\t<Stack N_CHANS=\"1\" N_BYTESxCHAN=\"2\" ROW=\"{j}\" COL=\"{i}\" ABS_V=\"{abs_Y_ef}\" ABS_H=\"{abs_X_ef}\" ABS_D=\"0\" STITCHABLE=\"no\" DIR_NAME=\"{dir_name}\" Z_RANGES=\"[0,{metadata['num_slices']})\" IMG_REGEX=\"{img_regex}\">{eofl}"
                else:
                    loc_string = f'\t\t<Stack N_CHANS="1" N_BYTESxCHAN="2" ROW="{j}" COL="{i}" ABS_V="{abs_Y_ef}" ABS_H="{abs_X_ef}" ABS_D="0" STITCHABLE="no" DIR_NAME="" Z_RANGES="" IMG_REGEX="{img_regex}">{eofl}'
                fp.writelines(
                    [
                        loc_string,
                        f"\t\t\t<NORTH_displacements />{eofl}",
                        f"\t\t\t<EAST_displacements />{eofl}",
                        f"\t\t\t<SOUTH_displacements />{eofl}",
                        f"\t\t\t<WEST_displacements />{eofl}",
                        f"\t\t</Stack>{eofl}",
                    ]
                )
        fp.writelines([f"\t</STACKS>{eofl}", f"</TeraStitcher>{eofl}"])


[docs]def write_terastitcher_commands(fname_ts, metadata, stitched_dir, do_steps):
    """Generate Terastitcher commands from metadata

    Args:
        fname_ts (str): Path to bash file to store Terastitcher commands
        metadata (dict): Metadata information about experiment
        stitched_dir (str): Path to where stitched data will be stored
        do_steps (int): Indicator of which steps to run

    Returns:
        list of str: List of Terastitcher commands to run
    """
    eofl = "\n"
    subvoldim = 60
    # subvoldim = max(metadata['num_slices']//num_processes,20)
    mem = virtual_memory()
    num_cpus = joblib.cpu_count()
    num_processes = min(
        math.floor(
            mem.total
            / (
                (metadata["num_pix"] ** 2)
                * 4
                * (min(metadata["grid_size_X"], metadata["grid_size_Y"]) + 1)
                * subvoldim
            )
        )
        + 1,
        num_cpus,
    )
    depth = 5
    num_proc_merge = min(
        math.floor(mem.total / (metadata["height"] * metadata["width"] * 2 * depth)),
        num_cpus,
    )
    print(f"num processes to use for stitching is: {num_processes}")
    # step1 = f"terastitcher --test --projin={metadata['stack_dir']}/xml_import.xml --imout_depth=16 --sparse_data{eofl}"
    step2 = f"mpirun -n {num_processes} {python_path} {parastitcher_path} -2 --projin=\"{metadata['stack_dir']}/xml_import.xml\" --projout=\"{metadata['stack_dir']}/xml_displcomp.xml\" --sV={metadata['sV']} --sH={metadata['sH']} --sD={metadata['sD']} --subvoldim={subvoldim} --sparse_data --exectimes --exectimesfile=\"{metadata['stack_dir']}/t_displcomp\"{eofl}"
    step3 = f"terastitcher --displproj --projin=\"{metadata['stack_dir']}/xml_displcomp.xml\" --projout=\"{metadata['stack_dir']}/xml_displproj.xml\" --sparse_data{eofl}"
    step4 = f"terastitcher --displthres --projin=\"{metadata['stack_dir']}/xml_displproj.xml\" --projout=\"{metadata['stack_dir']}/xml_displthres.xml\" --threshold=0.3 --sparse_data{eofl}"
    step5 = f"terastitcher --placetiles --projin=\"{metadata['stack_dir']}/xml_displthres.xml\"{eofl}"
    step6 = f"mpirun -n {num_proc_merge} {python_path} {paraconverter_path} -s=\"{metadata['stack_dir']}/xml_merging.xml\" -d=\"{stitched_dir}\" --sfmt=\"TIFF (unstitched, 3D)\" --dfmt=\"TIFF (series, 2D)\" --height={metadata['height']} --width={metadata['width']} --depth={depth}{eofl}"
    ts_commands = []

    if do_steps == STITCH_ONLY:
        # do step 5 in case xml_merging doesnt exist
        ts_commands.extend([step5, step6])
    elif do_steps == COMPUTE_ONLY:
        # do step 5 twice to fix terastitcher issue where placetiles cant find tiffs in folder        
        ts_commands.extend([step2, step3, step4, step5, step5])
    else:
        # do step 5 twice to fix terastitcher issue
        ts_commands.extend([step2, step3, step4, step5, step5, step6])

    with open(fname_ts, "w") as fp:
        fp.writelines(ts_commands)

    return ts_commands


[docs]def get_metadata(path_to_config):
    """Get metadata from COLM config file.

    Args:
        path_to_config (str): Path to Experiment.ini file (COLM config file)

    Returns:
        dict: Metadata information.
    """
    metadata = {}

    config = ConfigParser()
    config.read(path_to_config)

    metadata["grid_size_X"] = int(
        config["North Scan Region"]["Num Horizontal"].strip('"')
    )
    metadata["grid_size_Y"] = int(
        config["North Scan Region"]["Num Vertical"].strip('"')
    )
    metadata["z_step"] = int(
        float(config["North Scan Region"]["Stack Step (mm)"].strip('"')) * 1000
    )

    metadata["num_slices"] = int(
        config["Experiment Settings"]["Num in stack (Top Left Corner)"].strip('"')
    )
    metadata["num_pix"] = int(config["Experiment Settings"]["X Resolution"].strip('"'))
    metadata["num_ch"] = int(
        config["Experiment Settings"]["Num Enabled Channels"].strip('"')
    )

    metadata["overlap_X"] = (
        float(
            config["North Scan Region Stats"]["Actual Horizontal Overlap (%)"].strip(
                '"'
            )
        )
        / 100
    )
    metadata["overlap_Y"] = (
        float(
            config["North Scan Region Stats"]["Actual Vertical Overlap (%)"].strip('"')
        )
        / 100
    )

    mag_idx = config["Objectives"]["North"].find("x") - 2
    metadata["mag"] = int(config["Objectives"]["North"][mag_idx : mag_idx + 2])

    metadata["num_pix"] = int(config["Experiment Settings"]["X Resolution"].strip('"'))
    metadata["num_ch"] = int(
        config["Experiment Settings"]["Num Enabled Channels"].strip('"')
    )
    metadata["scale_factor"] = 2048 / metadata["num_pix"]
    metadata["origin"] = (0, 0, 0)
    scale_factor = metadata["scale_factor"]
    if metadata["mag"] == 4:
        metadata["voxel_size"] = (
            1.46 * scale_factor,
            1.46 * scale_factor,
            metadata["z_step"],
        )
        # terastitcher parameters
        # X,Y,Z search radius in voxels to compute tile displacement
        metadata["sH"] = math.ceil(60 / scale_factor)
        metadata["sV"] = math.ceil(60 / scale_factor)
        metadata["sD"] = math.ceil(20 / scale_factor)

    elif metadata["mag"] == 10:
        metadata["voxel_size"] = (
            0.585 * scale_factor,
            0.585 * scale_factor,
            metadata["z_step"],
        )
        # terastitcher parameters
        # X,Y,Z search radius in voxels to compute tile displacement
        metadata["sH"] = 100
        metadata["sV"] = 60
        metadata["sD"] = 5
    elif metadata["mag"] == 25:
        metadata["voxel_size"] = (
            0.234 * scale_factor,
            0.234 * scale_factor,
            metadata["z_step"],
        )
        # terastitcher parameters
        # X,Y,Z search radius in voxels to compute tile displacement
        metadata["sH"] = math.ceil(60 / scale_factor)
        metadata["sV"] = math.ceil(60 / scale_factor)
        metadata["sD"] = math.ceil(20 / scale_factor)
    else:
        raise ("The only magnifications supported are 4,  10, or 25")
    metadata["mechanical_displacements"] = (
        math.floor(
            metadata["num_pix"]
            * (1 - metadata["overlap_X"])
            * metadata["voxel_size"][0]
        ),
        math.floor(
            metadata["num_pix"]
            * (1 - metadata["overlap_Y"])
            * metadata["voxel_size"][1]
        ),
    )
    metadata["abs_X"] = math.floor(metadata["num_pix"] * (1 - metadata["overlap_X"]))
    metadata["abs_Y"] = math.floor(metadata["num_pix"] * (1 - metadata["overlap_Y"]))
    metadata["width"] = math.ceil(
        metadata["abs_X"] * metadata["grid_size_X"]
        + metadata["num_pix"] * metadata["overlap_X"]
    )
    metadata["height"] = math.ceil(
        metadata["abs_Y"] * metadata["grid_size_Y"]
        + metadata["num_pix"] * metadata["overlap_Y"]
    )
    print(f"overlap_X: {metadata['overlap_X']}")
    print(f"overlap_Y: {metadata['overlap_Y']}")
    print(f"abs_X: {metadata['abs_X']}")
    print(f"abs_Y: {metadata['abs_Y']}")
    print(f"width: {metadata['width']}")
    print(f"height: {metadata['height']}")
    return metadata


[docs]def get_scanned_cells(fname_scanned_cells):
    """Read Scanned Cells.txt file from COLM into list

    Args:
        fname_scanned_cells (str): Path to scanned cells file.

    Returns:
        list of lists: Indicates whether or not a given location has been imaged on the COLM
    """
    # read scanned matrix file
    scanned_matrix = []
    with open(fname_scanned_cells, "r") as fp:
        for line in fp.readlines():
            x = line.strip().split(",")
            scanned_matrix.append(x)
    return scanned_matrix


[docs]def generate_stitching_commands(
    stitched_dir, stack_dir, metadata_s3_bucket, metadata_s3_path, do_steps=ALL_STEPS
):
    """Generate Terastitcher stitching commands given COLM metadata files.

    Args:
        stitched_dir (str): Path to store stitched data at.
        stack_dir (str): Path to unstiched raw data.
        metadata_s3_bucket (str): Name of S3 bucket in which metdata is located.
        metadata_s3_path (str): Specific path to metadata files in the bucket
        do_steps (int, optional): Represents which Terastitcher steps to run. Defaults to ALL_STEPS (2).

    Returns:
        tuple (dict, list of str): Metadata and list of Terastitcher commands
    """

    # download COLM metadata files
    # if they don't exist locally
    scanned_cells_path = f"{stack_dir}/Scanned Cells.txt"
    config_file_path = f"{stack_dir}/Experiment.ini"
    if not os.path.exists(scanned_cells_path) or not os.path.exists(config_file_path):
        s3 = boto3.resource("s3")
        s3.Object(
            metadata_s3_bucket, f"{metadata_s3_path}/Scanned Cells.txt"
        ).download_file(scanned_cells_path)
        s3.Object(
            metadata_s3_bucket, f"{metadata_s3_path}/Experiment.ini"
        ).download_file(config_file_path)

    # get metadata
    metadata = get_metadata(config_file_path)
    metadata["stack_dir"] = stack_dir

    # load scanned cells to indicate which locations contain data
    scanned_matrix = get_scanned_cells(scanned_cells_path)

    # write xml_import file for terastitcher
    fname_importxml = f"{stack_dir}/xml_import.xml"
    write_import_xml(fname_importxml, scanned_matrix, metadata)

    fname_ts = f"{stack_dir}/terastitcher_commands.sh"
    ts_commands = write_terastitcher_commands(
        fname_ts, metadata, stitched_dir, do_steps
    )

    return metadata, ts_commands


[docs]def run_terastitcher(
    raw_data_path,
    stitched_data_path,
    input_s3_path,
    log_s3_path=None,
    stitch_only=False,
    compute_only=False,
):
    """Run Terastitcher commands to fully stitch raw data.

    Args:
        raw_data_path (str): Path to raw data (VW0 folder for COLM data)
        stitched_data_path (str): Path to where stitched data will be stored
        input_s3_path (str): S3 Path to where raw data and metadata live
        log_s3_path (str, optional): S3 path to store intermediates and XML files for Terastitcher. Defaults to None.
        stitch_only (bool, optional): Do stitching only if True. Defaults to False.
        compute_only (bool, optional): Compute alignments only if True. Defaults to False.

    Returns:
        dict: Metadata associated with this sample from Experiment.ini file (COLM data)
    """

    input_s3_url = S3Url(input_s3_path.strip("/"))

    if stitch_only:
        do_steps = STITCH_ONLY
    elif compute_only:
        do_steps = COMPUTE_ONLY
    else:
        do_steps = ALL_STEPS

    metadata, commands = generate_stitching_commands(
        stitched_data_path,
        raw_data_path,
        input_s3_url.bucket,
        input_s3_url.key,
        do_steps,
    )

    # run the Terastitcher commands
    mdata = f"{raw_data_path}/mdata.bin"
    for i in commands:
        print(i)
        subprocess.run(shlex.split(i))
        if os.path.exists(mdata):
            os.remove(mdata)

    # # upload xml results to log_s3_path if not None
    # # and if not stitch_only
    if log_s3_path:
        log_s3_url = S3Url(log_s3_path.strip("/"))
        files_to_save = glob(f"{raw_data_path}/*.xml")
        for i in tqdm(files_to_save, desc="saving xml files to S3"):
            out_path = i.split("/")[-1]
            upload_file_to_s3(i, log_s3_url.bucket, f"{log_s3_url.key}/{out_path}")

    return metadata


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        "Create xml_import.xml file and terastitcher_commands.sh from Experiment.ini file"
    )
    parser.add_argument(
        "--stitched_dir",
        help="Directory to  store stitched tifs.",
        type=str,
        default="/home/ubuntu/ssd2/stitched_data",
    )
    parser.add_argument(
        "--stack_dir",
        help="Path to VW0 directory with tiles stored in LOC* folders.",
        type=str,
        default="/home/ubuntu/ssd1/VW0",
    )
    parser.add_argument(
        "--config_file",
        help="Path to Experiment.ini file",
        type=str,
        default="/home/ubuntu/ssd1/Experiment.ini",
    )
    parser.add_argument(
        "--scanned_cells",
        help="Path to Scanned Cells.txt file",
        type=str,
        default="/home/ubuntu/ssd1/Scanned Cells.txt",
    )
    parser.add_argument(
        "--stitch_only",
        help="If true, only run the stitching commands from existing xml_merging.xml file",
        type=bool,
        default=False,
    )

    args = parser.parse_args()

    generate_stitching_commands(
        args.stitched_dir,
        args.stack_dir,
        args.config_file,
        args.scanned_cells,
        args.stitch_only,
    )