Dockerized Kallisto Cloud Pipeline

From Array Suite Wiki

(Difference between revisions)
Jump to: navigation, search
 
Line 68: Line 68:
 
   File "@OutputFolderName@/Merged/merged_result.tpm";
 
   File "@OutputFolderName@/Merged/merged_result.tpm";
 
   Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoTPM";
 
   Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoTPM";
 +
Output KallistoTPM;
 
   End;
 
   End;
 
    
 
    
Line 73: Line 74:
 
   File "@OutputFolderName@/Merged/merged_result.count";
 
   File "@OutputFolderName@/Merged/merged_result.count";
 
   Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoCounts";
 
   Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoCounts";
 +
Output KallistoCounts;
 
   End;
 
   End;

Latest revision as of 08:35, 21 April 2020

<Info>
Label=Kallisto pipeline Cloud with Python Merge
Description=Run Kallisto on Cloud and combine STDERR to STDOUT
Category=Pipeline\External Tool

<Input>
@OutputFolderName@=
~@OutputFolderName@Type=FilePath
~@OutputFolderName@=Output folder

//Display
ExternalScriptInputType=Files
ExternalScriptMenuText=Kallisto pipeline v1.2 on Cloud
ExternalScriptMenuStructure=Pipeline\External Tools\Alignment
ExternalScriptFileFilter=FASTQ|*.fastq|FASTQ.GZ|*.gz

//Kallisto Index file location
@KallistoIndexLocation@=
~@KallistoIndexLocation@Type=FileName
~@KallistoIndexLocation@=Location of precomputed index, if on cloud, be sure to select cloud index
~@KallistoIndexLocation@Filter=Kallisto Index Files|*.idx;

@Bootstrap@=0
~@Bootstrap@=Number of Bootstrap cycles to run in Kallisto

//Options
@ParallelJobNumber@=1
~@ParallelJobNumber@=Parallel jobs number
~@ParallelJobNumber@Levels=1,2,4,8,16,32,64,128

@ThreadNumberPerJob@=4
~@ThreadNumberPerJob@=Number of threads for each job
~@ThreadNumberPerJob@Levels=1,2,3,4,5,6,7,8

@InstanceType@=m4.large

@ErrorOnStdErr@=False
~@ErrorOnStdErr@=Error on standard error
~@ErrorOnStdErr@Levels=True,False
~@ErrorOnStdErr@ExclusiveLevels=True

@ErrorOnMissingOutput@=True
~@ErrorOnMissingOutput@=Error on missing output
~@ErrorOnMissingOutput@Levels=True,False
~@ErrorOnMissingOutput@ExclusiveLevels=True

<Script>

Begin RunEScript /RunOnServer=True;
Resources
"@KallistoIndexLocation@";
Files
"@FileNames@";
EScriptName KallistoQuant;
Command kallisto quant -i "%Resource1%" -t @ThreadNumberPerJob@ -o "%OutputFolder%" -b @Bootstrap@ %FilePath1% %FilePath2% 2>&1;
Options /ParallelJobNumber=@ParallelJobNumber@ /ThreadNumberPerJob=@ThreadNumberPerJob@ /Mode=Paired /InstanceType=@InstanceType@ /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnMissingOutput@ /RunOnDocker=True /ImageName="omicdocker/kallisto:testing" /UseCloud=True /OutputFolder="@OutputFolderName@/%PairName%";
Output "@OutputFolderName@/%PairName%/abundance.tsv => @OutputFolderName@/%PairName%_abundance.tsv" /Type=tsv;
End;

 Begin RunEScript /RunOnServer=True;
 SearchFiles "@OutputFolderName@" /Pattern=*.tsv /Recursive=False; 
 EScriptName KallistoMergePython;
 Command python3 Anisto.py -i "%FileDirectory%" -o "%FileDirectory%" -p "merged";
 Options /Mode=Multiple /ErrorOnStdErr=@ErrorOnStdErr@ /ErrorOnMissingOutput=@ErrorOnStdErr@ /RunOnDocker=True /ImageName="omicdocker/pandas:latest" /OutputFolder="@OutputFolderName@/Merged" /UseCloud=True;
 End;

 Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True;
 File "@OutputFolderName@/Merged/merged_result.tpm";
 Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoTPM";
Output KallistoTPM;
 End;
 
 Begin ImportMicroArray/Namespace=MicroArray /Runtime=True /RunOnServer=True;
 File "@OutputFolderName@/Merged/merged_result.count";
 Options /Format=Txt /RowsAreObservations=False /IgnoreDescriptiveColumns=False /Output="KallistoCounts";
Output KallistoCounts;
 End;