Source code for cloudreg.scripts.colm_pipeline

from .download_raw_data import download_raw_data
from .correct_raw_data import correct_raw_data
from .create_precomputed_volume import create_precomputed_volume
from .correct_stitched_data import correct_stitched_data
from .stitching import run_terastitcher
from .util import (
    S3Url,
    download_terastitcher_files,
    tqdm_joblib,
)
from .visualization import create_viz_link

import numpy as np
from glob import glob
from tqdm import tqdm
import argparse
import os
from joblib import Parallel, delayed
import shutil


[docs]def colm_pipeline( input_s3_path, output_s3_path, channel_of_interest, autofluorescence_channel, raw_data_path, stitched_data_path, log_s3_path=None, ): """Run COLM pipeline including vignetting correction, stitching, illumination correction, and upload to S3 in Neuroglancer-compatible format Args: input_s3_path (str): S3 path to raw COLM data. Should be of the form s3://<bucket>/<experiment> output_s3_path (str): S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed> channel_of_interest (int): Channel number to operate on. Should be a single integer. autofluorescence_channel (int): Autofluorescence channel number. Should be a single integer. raw_data_path (str): Local path where corrected raw data will be stored. stitched_data_path (str): Local path where stitched slices will be stored. log_s3_path (str, optional): S3 path at which pipeline intermediates can be stored including bias correction tile and xml files from Terastitcher. Defaults to None. """ # get the metadata file paths specific for COLM input_s3_url = S3Url(input_s3_path.strip("/")) output_s3_url = S3Url(output_s3_path.strip("/")) # download raw data onto local SSD vw0_path = f"{input_s3_url.url}/VW0/" download_raw_data(vw0_path, channel_of_interest, raw_data_path) # compute stitching alignments first if you need to # download stitching files if they exist at log path if ( not download_terastitcher_files(log_s3_path, raw_data_path) and channel_of_interest == 0 ): metadata = run_terastitcher( raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, compute_only=True, ) # bias correct all tiles # save bias correction tile to log_s3_path correct_raw_data(raw_data_path, channel_of_interest, log_s3_path=log_s3_path) # now stitch the data with alignments we computed metadata = run_terastitcher( raw_data_path, stitched_data_path, input_s3_path, log_s3_path=log_s3_path, stitch_only=True, ) # downsample and upload stitched data to S3 stitched_path = glob(f"{stitched_data_path}/RES*")[0] create_precomputed_volume( stitched_path, np.array(metadata["voxel_size"]), output_s3_path ) # correct whole brain bias # in order to not replicate data (higher S3 cost) # overwrite original precomputed volume with corrected data correct_stitched_data(output_s3_path, output_s3_path) # print viz link to console # visualize data at 5 microns viz_link = create_viz_link( [output_s3_path], output_resolution=np.array([5] * 3) / 1e6 ) print("###################") print(f"VIZ LINK: {viz_link}") print("###################")
if __name__ == "__main__": parser = argparse.ArgumentParser( "Run COLM pipeline including bias correction, stitching, upoad to S3" ) parser.add_argument( "input_s3_path", help="S3 path to input colm data. Should be of the form s3://<bucket>/<experiment>", type=str, ) parser.add_argument( "output_s3_path", help="S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>. The data will be saved at s3://<bucket>/<path_to_precomputed>/CHN0<channel>", type=str, ) # parser.add_argument('channel_of_interest', help='Channel of interest in experiment', type=int) parser.add_argument( "num_channels", help="Number of channels in experiment", type=int ) parser.add_argument( "autofluorescence_channel", help="Autofluorescence channel number.", type=int ) parser.add_argument( "--raw_data_path", help="Local path where corrected raw data will be stored.", type=str, default=os.path.expanduser("~/ssd1"), ) parser.add_argument( "--stitched_data_path", help="Local path where stitched slices will be stored.", type=str, default=os.path.expanduser("~/ssd2"), ) parser.add_argument( "--log_s3_path", help="S3 path at which pipeline intermediates can be stored including bias correctin tile.", type=str, default=None, ) args = parser.parse_args() # for all channels in experiment for i in range(args.num_channels): output_s3_path = args.output_s3_path.strip("/") colm_pipeline( args.input_s3_path, f"{output_s3_path}/CHN0{i}", i, args.autofluorescence_channel, args.raw_data_path, args.stitched_data_path, args.log_s3_path, ) if i < args.num_channels - 1: # delete all tiff files in raw_data_path directories_to_remove = glob(f"{args.raw_data_path}/LOC*") directories_to_remove.extend(glob(f"{args.stitched_data_path}/RES*")) with tqdm_joblib( tqdm( desc=f"Delete files from CHN0{i}", total=len(directories_to_remove) ) ) as progress_bar: Parallel(-1)(delayed(shutil.rmtree)(f) for f in directories_to_remove) # make sure to delete mdata.bin from terastitcher if os.path.exists(f"{args.raw_data_path}/mdata.bin"): os.remove(f"{args.raw_data_path}/mdata.bin")