Dockerized Kallisto Server Pipeline
From Array Suite Wiki
This pipeline script is GUI-enabled and will allow users to run Kallisto quant on your OmicSoft Server architecture, importing the results into a set of OmicData files.
- Run Kallisto quant on each pair of input files
- Merge all Kallisto output files (abundance.tsv) into a pair of matrices, using a Dockerized Python script "Anisto.py" written by OmicSoft
- Kallisto outputs each sample's quantification as a separate file. To make it simple to import into an OmicSoft Studio project, it is recommended to merge the files before importing.
- Import the TPM output file as an OmicData object
- Import the Counts output file as an OmicData object
Kallisto_Server_Pipeline.pscript
<Info> Label=Kallisto pipeline on Server with Python Merge Description=Run Kallisto on Server files using anisto_performance.py to merge Category=Pipeline\External Tool <Input> @OutputFolderName@= ~@OutputFolderName@Type=FilePath ~@OutputFolderName@=Output folder //Display ExternalScriptInputType=Files ExternalScriptMenuText=Kallisto Pipeline Server - python merge ExternalScriptMenuStructure=Pipeline\External Tools\Alignment ExternalScriptFileFilter=FASTQ|*.fastq|FASTQ.GZ|*.gz //Kallisto Index file location @KallistoIndexLocation@= ~@KallistoIndexLocation@Type=FileName ~@KallistoIndexLocation@=Location of precomputed index, if on cloud, be sure to select cloud index ~@KallistoIndexLocation@Filter=Kallisto Index Files|*.idx; @Bootstrap@=0 ~@Bootstrap@=Number of Bootstrap cycles to run in Kallisto //Options @ParallelJobNumber@=1 ~@ParallelJobNumber@=Parallel jobs number ~@ParallelJobNumber@Levels=1,2,4,8,16,32,64,128 @ThreadNumberPerJob@=4 ~@ThreadNumberPerJob@=Number of threads for each job ~@ThreadNumberPerJob@Levels=1,2,3,4,5,6,7,8 @Mode@=Paired ~@Mode@=Input FASTQ Files ~@Mode@Levels=Single,Paired,Multiple ~@Mode@ExclusiveLevels=True @ErrorOnStdErr@=False ~@ErrorOnStdErr@=Error on standard error ~@ErrorOnStdErr@Levels=True,False ~@ErrorOnStdErr@ExclusiveLevels=True @ErrorOnMissingOutput@=True ~@ErrorOnMissingOutput@=Error on missing output ~@ErrorOnMissingOutput@Levels=True,False ~@ErrorOnMissingOutput@ExclusiveLevels=True <Script> Begin RunEScript; Resources "$$@KallistoIndexLocation@"; Files "@FileNames@"; EScriptName KallistoQuant; Command kallisto quant -i "%Resource1%" -t @ThreadNumberPerJob@ -o "%OutputFolder%" -b @Bootstrap@ %FilePath1% %FilePath2% 2>&1; Options /ParallelJobNumber=@ParallelJobNumber@ /ThreadNumberPerJob=@ThreadNumberPerJob@ /Mode=@Mode@ /InstanceType=@InstanceType@ /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnMissingOutput@ /RunOnDocker=True /ImageName="omicdocker/kallisto:testing" /OutputFolder="$$@OutputFolderName@/%PairName%"; Output "$$@OutputFolderName@/%PairName%/abundance.tsv => $$@OutputFolderName@/%PairName%_abundance.tsv" /Type=tsv; End; Begin RunEScript /RunOnServer=True; SearchFiles "$$@OutputFolderName@" /Pattern=*.tsv /Recursive=False; EScriptName KallistoMergePython; Command python3 Anisto.py -i "%FileDirectory%" -o "%FileDirectory%" -p "merged"; Options /Mode=Multiple /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnStdErr@ /RunOnDocker=True /ImageName="omicdocker/pandas:latest" /OutputFolder="$$@OutputFolderName@"; End; Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True; File "@OutputFolderName@/merged_result.tpm"; Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /SheetName= /Output="KallistoTPM"; Output KallistoTPM; End; Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True; File "@OutputFolderName@/merged_result.count"; Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /SheetName= /Output="KallistoCounts"; Output KallistoCounts; End;