Source code for cloudreg.scripts.create_precomputed_volume

# local imports
from .util import tqdm_joblib, calc_hierarchy_levels

import math
from cloudvolume import CloudVolume
import numpy as np
import joblib
from joblib import Parallel, delayed
from glob import glob
import argparse
import PIL
from PIL import Image
from psutil import virtual_memory
from tqdm import tqdm
import tinybrain

PIL.Image.MAX_IMAGE_PIXELS = None


[docs]def create_cloud_volume( precomputed_path, img_size, voxel_size, num_mips, chunk_size, parallel=False, layer_type="image", dtype="uint16", ): """Create Neuroglancer precomputed volume S3 Args: precomputed_path (str): S3 Path to location where precomputed layer will be stored img_size (list of int): Size of the image (in 3D) to be uploaded voxel_size ([type]): Voxel size in nanometers num_mips (int, optional): Number of downsampling levels in X and Y. Defaults to 6. chunk_size (list, optional): Size of each chunk stored on S3. Defaults to [1024, 1024, 1]. parallel (bool, optional): Whether or not the returned CloudVlue object will use parallel threads. Defaults to False. layer_type (str, optional): Neuroglancer type of layer. Can be image or segmentation. Defaults to "image". dtype (str, optional): Datatype of precomputed volume. Defaults to "uint16". Returns: cloudvolume.CloudVolume: CloudVolume object associated with this precomputed volume """ info = CloudVolume.create_new_info( num_channels=1, layer_type=layer_type, data_type=dtype, # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=voxel_size, # Voxel scaling, units are in nanometers voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin # Pick a convenient size for your underlying chunk representation # Powers of two are recommended, doesn't need to cover image exactly chunk_size=chunk_size, # units are voxels volume_size=img_size, # e.g. a cubic millimeter dataset ) vol = CloudVolume(precomputed_path, info=info, parallel=parallel) [vol.add_scale((2 ** i, 2 ** i, 1), chunk_size=chunk_size) for i in range(num_mips)] vol.commit_info() return vol
[docs]def get_image_dims(files): """Get X,Y,Z dimensions of images based on list of files Args: files (list of str): Path to 2D tif series Returns: list of int: X,Y,Z size of image in files """ # get X,Y size of image by loading first slice img = np.squeeze(np.array(Image.open(files[0]))).T # get Z size by number of files in directory z_size = len(files) x_size, y_size = img.shape return [x_size, y_size, z_size]
[docs]def process(z, file_path, layer_path, num_mips): """Upload single slice to S3 as precomputed Args: z (int): Z slice number to upload file_path (str): Path to z-th slice layer_path (str): S3 path to store data at num_mips (int): Number of 2x2 downsampling levels in X,Y """ vols = [ CloudVolume(layer_path, mip=i, parallel=False, fill_missing=False) for i in range(num_mips) ] # array = load_image(file_path)[..., None] # array = tf.imread(file_path).T[..., None] array = np.squeeze(np.array(Image.open(file_path))).T[..., None] img_pyramid = tinybrain.accelerated.average_pooling_2x2(array, num_mips) vols[0][:, :, z] = array for i in range(num_mips - 1): vols[i + 1][:, :, z] = img_pyramid[i] return
[docs]def create_precomputed_volume( input_path, voxel_size, precomputed_path,num_procs=None, extension="tif" ): """Create precomputed volume on S3 from 2D TIF series Args: input_path (str): Local path to 2D TIF series voxel_size (np.ndarray): Voxel size of image in X,Y,Z in microns precomputed_path (str): S3 path where precomputed volume will be stored extension (str, optional): Extension for image files. Defaults to "tif". """ files_slices = list( enumerate(np.sort(glob(f"{input_path}/*.{extension}")).tolist()) ) zs = [i[0] for i in files_slices] files = np.array([i[1] for i in files_slices]) img_size = get_image_dims(files) # compute num_mips from data size chunk_size = [128, 128, 1] num_mips = calc_hierarchy_levels(img_size, lowest_res=chunk_size[0]) # convert voxel size from um to nm vol = create_cloud_volume( precomputed_path, img_size, voxel_size * 1000, num_mips, chunk_size, parallel=False, ) if num_procs == None: # num procs to use based on available memory num_procs = min( math.floor(virtual_memory().total / (img_size[0] * img_size[1] * 8)), joblib.cpu_count(), ) try: with tqdm_joblib( tqdm(desc="Creating precomputed volume", total=len(files)) ) as progress_bar: Parallel(num_procs, timeout=3600, verbose=10)( delayed(process)(z, f, vol.layer_cloudpath, num_mips,) for z, f in zip(zs, files) ) except Exception as e: print(e) print("timed out on a slice. moving on to the next step of pipeline")
if __name__ == "__main__": parser = argparse.ArgumentParser( description="Convert local volume into precomputed volume on S3." ) parser.add_argument( "input_path", help="Path to directory containing stitched tiles named sequentially.", ) parser.add_argument( "voxel_size", help="Voxel size in microns of image in 3D in X, Y, Z order.", nargs="+", type=float, ) parser.add_argument( "precomputed_path", help="Path to location on s3 where precomputed volume should be stored. Example: s3://<bucket>/<experiment>/<channel>", ) parser.add_argument( "--num_procs", help="Number of processes to use in parallel. It is possible we may exceed the request rate so you may want to reduce the number of cores.", default=None, type=int ) args = parser.parse_args() create_precomputed_volume( args.input_path, np.array(args.voxel_size), args.precomputed_path, args.num_procs )