Source code for cloudreg.scripts.run_colm_pipeline_ec2

from .util import start_ec2_instance, run_command_on_server
import argparse
import boto3


[docs]def run_colm_pipeline( ssh_key_path, instance_id, input_s3_path, output_s3_path, num_channels, autofluorescence_channel, log_s3_path=None, instance_type="r5d.24xlarge", ): """Run COLM pipeline on EC2 instance Args: ssh_key_path (str): Local path to ssh key needed for this server instance_id (str): ID of the EC2 instance to run pipeline on input_s3_path (str): S3 Path to raw data output_s3_path (str): S3 path to store precomputed volume. Volume is stored at output_s3_path/channel for each channel. num_channels (int): Number of channels in this volume autofluorescence_channel (int): Autofluorescence channel number log_s3_path (str, optional): S3 path to store intermediates including vignetting correction and Terastitcher files. Defaults to None. instance_type (str, optional): AWS EC2 instance type. Defaults to "r5d.24xlarge". """ # get ec2 client ec2 = boto3.resource("ec2") public_ip_address = start_ec2_instance(instance_id, instance_type) # now run command on instance # update the code on the instance update_command = "mkdir -p ~/ssd1 ~/ssd2; git clone https://github.com/neurodata/CloudReg.git; cd CloudReg; git pull; docker pull neurodata/cloudreg;" print("updating CloudReg code on EC2 instance...") errors_update = run_command_on_server( update_command, ssh_key_path, public_ip_address ) # mount ssds command command1 = "sudo bash CloudReg/cloudreg/scripts/mount_combined_ssds.sh" # colm pipeline command # command2 = f'time /home/ubuntu/colm_pipeline_env/bin/python CloudReg/scripts/colm_pipeline.py {input_s3_path} {output_s3_path} {num_channels} {autofluorescence_channel} --log_s3_path {log_s3_path}' command2 = f"cd CloudReg/; time docker-compose run -v ~/ssd1:/root/ssd1 -v ~/ssd2:/root/ssd2 cloudreg {input_s3_path} {output_s3_path} {num_channels} {autofluorescence_channel} --log_s3_path {log_s3_path}" print(command2) errors1 = run_command_on_server(command1, ssh_key_path, public_ip_address) print(errors1) errors2 = run_command_on_server(command2, ssh_key_path, public_ip_address) print(errors2) # shut down instance ec2.meta.client.stop_instances(InstanceIds=[instance_id])
if __name__ == "__main__": parser = argparse.ArgumentParser( "Run COLM pipeline on remote EC2 instance with given input parameters" ) parser.add_argument( "-ssh_key_path", help="path to identity file used to ssh into given instance" ) parser.add_argument( "-instance_id", help="EC2 Instance ID of instance to run COLM pipeline on." ) parser.add_argument( "-input_s3_path", help="S3 path to input colm data. Should be of the form s3://<bucket>/<experiment>", type=str, ) parser.add_argument( "-output_s3_path", help="S3 path to store precomputed volume. Precomputed volumes for each channel will be stored under this path. Should be of the form s3://<bucket>/<path_to_precomputed>. The data will be saved at s3://<bucket>/<path_to_precomputed>/CHN0<channel>", type=str, ) # parser.add_argument('channel_of_interest', help='Channel of interest in experiment', type=int) parser.add_argument( "-num_channels", help="Number of channels in experiment", type=int ) parser.add_argument( "-autofluorescence_channel", help="Autofluorescence channel number.", type=int ) parser.add_argument( "-log_s3_path", help="S3 path at which pipeline intermediates can be stored including bias correctin tile.", type=str, default="", ) parser.add_argument( "--instance_type", help="EC2 instance type to run pipeline on. minimum r5d.16xlarge", type=str, default="r5d.16xlarge", ) args = parser.parse_args() run_colm_pipeline( args.ssh_key_path, args.instance_id, args.input_s3_path, args.output_s3_path, args.num_channels, args.autofluorescence_channel, args.log_s3_path, instance_type=args.instance_type, )