Dockerized Kallisto Server Pipeline

From Array Suite Wiki

Jump to: navigation, search

This pipeline script is GUI-enabled and will allow users to run Kallisto quant on your OmicSoft Server architecture, importing the results into a set of OmicData files.

  1. Run Kallisto quant on each pair of input files
  2. Merge all Kallisto output files (abundance.tsv) into a pair of matrices, using a Dockerized Python script "Anisto.py" written by OmicSoft
    1. Kallisto outputs each sample's quantification as a separate file. To make it simple to import into an OmicSoft Studio project, it is recommended to merge the files before importing.
  3. Import the TPM output file as an OmicData object
  4. Import the Counts output file as an OmicData object

Kallisto_Server_Pipeline.pscript

<Info>
Label=Kallisto pipeline on Server with Python Merge
Description=Run Kallisto on Server files using anisto_performance.py to merge
Category=Pipeline\External Tool

<Input>
@OutputFolderName@=
~@OutputFolderName@Type=FilePath
~@OutputFolderName@=Output folder

//Display
ExternalScriptInputType=Files
ExternalScriptMenuText=Kallisto Pipeline Server - python merge
ExternalScriptMenuStructure=Pipeline\External Tools\Alignment
ExternalScriptFileFilter=FASTQ|*.fastq|FASTQ.GZ|*.gz

//Kallisto Index file location
@KallistoIndexLocation@=
~@KallistoIndexLocation@Type=FileName
~@KallistoIndexLocation@=Location of precomputed index, if on cloud, be sure to select cloud index
~@KallistoIndexLocation@Filter=Kallisto Index Files|*.idx;

@Bootstrap@=0
~@Bootstrap@=Number of Bootstrap cycles to run in Kallisto

//Options
@ParallelJobNumber@=1
~@ParallelJobNumber@=Parallel jobs number
~@ParallelJobNumber@Levels=1,2,4,8,16,32,64,128

@ThreadNumberPerJob@=4
~@ThreadNumberPerJob@=Number of threads for each job
~@ThreadNumberPerJob@Levels=1,2,3,4,5,6,7,8

@Mode@=Paired
~@Mode@=Input FASTQ Files
~@Mode@Levels=Single,Paired,Multiple
~@Mode@ExclusiveLevels=True

@ErrorOnStdErr@=False
~@ErrorOnStdErr@=Error on standard error
~@ErrorOnStdErr@Levels=True,False
~@ErrorOnStdErr@ExclusiveLevels=True

@ErrorOnMissingOutput@=True
~@ErrorOnMissingOutput@=Error on missing output
~@ErrorOnMissingOutput@Levels=True,False
~@ErrorOnMissingOutput@ExclusiveLevels=True

<Script>

Begin RunEScript;
Resources
"$$@KallistoIndexLocation@";
Files
"@FileNames@";
EScriptName KallistoQuant;
Command kallisto quant -i "%Resource1%" -t @ThreadNumberPerJob@ -o "%OutputFolder%" -b @Bootstrap@ %FilePath1% %FilePath2% 2>&1;
Options /ParallelJobNumber=@ParallelJobNumber@ /ThreadNumberPerJob=@ThreadNumberPerJob@ /Mode=@Mode@ /InstanceType=@InstanceType@ /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnMissingOutput@ /RunOnDocker=True /ImageName="omicdocker/kallisto:testing" /OutputFolder="$$@OutputFolderName@/%PairName%";
Output "$$@OutputFolderName@/%PairName%/abundance.tsv => $$@OutputFolderName@/%PairName%_abundance.tsv" /Type=tsv;
End;

Begin RunEScript /RunOnServer=True;
SearchFiles "$$@OutputFolderName@" /Pattern=*.tsv /Recursive=False; 
EScriptName KallistoMergePython;
Command python3 Anisto.py -i "%FileDirectory%" -o "%FileDirectory%" -p "merged";
Options /Mode=Multiple /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnStdErr@ /RunOnDocker=True /ImageName="omicdocker/pandas:latest" /OutputFolder="$$@OutputFolderName@";
End;

Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True;
File "@OutputFolderName@/merged_result.tpm";
Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /SheetName= /Output="KallistoTPM";
Output KallistoTPM;
End;

Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True;
File "@OutputFolderName@/merged_result.count";
Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /SheetName= /Output="KallistoCounts";
Output KallistoCounts;
End;