From dd32059ffdd88eb67434d9fa07aa3b18d0b3be1e Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 22 Aug 2024 17:03:38 +0200 Subject: [PATCH 1/9] Update manifest version --- conf/report.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/report.config b/conf/report.config index 8a5bb71..12ef314 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.19.0' + version = '1.20.0' } \ No newline at end of file -- GitLab From a9c5ac3581eec41a517528ce4fc486a62bc03b84 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 2 Sep 2024 14:13:00 +0200 Subject: [PATCH 2/9] SortmeRNA runs faster Ref: #109 --- conf/base.config | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/conf/base.config b/conf/base.config index 465f361..3e613d4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -324,6 +324,10 @@ process { module = toolsModuleHash['SEQTK'] } + withName: ADD_MULTIQC { + errorStrategy = 'ignore' + } + withName: MULTIQC { ext.args = [ "--config ${baseDir}/assets/multiqc_config.yaml", @@ -344,9 +348,9 @@ process { withName: SORTMERNA { module = toolsModuleHash['SORTMERNA'] - memory = { checkMax( 10.GB * task.attempt * params.resource_factor, 'memory' ) } - time = { checkMax( 10.h * task.attempt, 'time' ) } - cpus = { checkMax( 1 * task.attempt, 'cpus' ) } + memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) } + time = { checkMax( 3.h * task.attempt, 'time' ) } + cpus = { checkMax( 28 * task.attempt, 'cpus' ) } publishDir = [ path: "${params.outdir}/rRNA", -- GitLab From a7446f6c641c6b8ea13cc5dc4379df42b733fc5f Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Wed, 4 Sep 2024 09:41:29 +0200 Subject: [PATCH 3/9] Get version of each tools and use it in MQC report Ref: #15 --- assets/multiqc_config.yaml | 10 ++-- conf/base.config | 19 ++++++- conf/dependencies_genobioinfo.config | 2 +- conf/functions.config | 52 +++++++++++++++++++ main.nf | 2 +- modules/local/module_core.nf | 28 +++++++++- modules/local/module_diversity.nf | 12 +++++ modules/local/module_dna.nf | 24 +++++++++ modules/local/module_rna.nf | 28 +++++++--- nextflow.config | 10 +++- sub-workflows/local/core_illumina.nf | 4 ++ sub-workflows/local/core_pipeline.nf | 6 +++ sub-workflows/local/diversity_qc.nf | 5 ++ sub-workflows/local/dna_qc.nf | 9 ++++ sub-workflows/local/rna_qc.nf | 15 +++++- .../{illumina_qc.nf => short_reads_qc.nf} | 21 +++++++- 16 files changed, 226 insertions(+), 21 deletions(-) rename workflow/{illumina_qc.nf => short_reads_qc.nf} (94%) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index b5f8ae5..75a0989 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -11,14 +11,14 @@ report_comment: > show_analysis_paths: False show_analysis_time: False - +disable_version_detection: true ## Number formatting thousandsSep_format: " " ## General Statistics table table_columns_visible: - Duplicats: False - ContaminationSearch - RNA: True + Duplicats: True + ContaminationSearch - rRNA: True samtools: False ReadsStats: percent_duplicates: False @@ -43,7 +43,7 @@ extra_fn_clean_exts: - "_screen" ## Plot config -export_plots: true +export_plots: false plots_force_interactive: true ## Module config @@ -88,7 +88,7 @@ module_order: # Pattern sp: - fastqc: + fastqc/zip: fn: "*.zip" fastq_screen: fn: '*_screen.txt' diff --git a/conf/base.config b/conf/base.config index 3e613d4..965385f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -69,6 +69,7 @@ process { saveAs: { filename -> "${name}.fastq.gz" }*/ ] + ext.analyse_type = params.read_stats_label module = toolsModuleHash['ILLUMINA_FILTER'] cpus = { checkMax( 3 * task.attempt, 'cpus' ) } time = { checkMax( 4.h * task.attempt, 'time' ) } @@ -82,7 +83,7 @@ process { ] ext.args = "--reads_to_process ${params.fastp_n_reads}" - + ext.analyse_type = params.duplicats_label module = toolsModuleHash['FASTP'] time = { checkMax( 5.h * task.attempt, 'time' ) } memory = { checkMax( 3.GB * task.attempt, 'memory' ) } @@ -103,6 +104,7 @@ process { saveAs: { filename -> "${name}.html" } ] + ext.analyse_type = params.read_stats_label module = toolsModuleHash['FASTQC'] maxRetries = 4 cpus = { checkMax( 2 * task.attempt, 'cpus' ) } @@ -112,6 +114,7 @@ process { withName: FASTQSCREEN { time = { checkMax( 1.h * task.attempt, 'time' ) } module = toolsModuleHash['FASTQSCREEN'] + ext.analyse_type = params.contamination_search_label publishDir = [ path: "${params.outdir}/ContaminationSearch/FastQ-Screen", @@ -125,6 +128,8 @@ process { cpus = { checkMax( 6 * task.attempt, 'cpus' ) } memory = { checkMax( 16.GB * task.attempt, 'memory' ) } time = { checkMax( 3.d * task.attempt, 'time' ) } + + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignment/bwa", @@ -137,7 +142,8 @@ process { module = toolsModuleHash['SALMON'] time = { checkMax( 1.h * task.attempt, 'time' ) } memory = { checkMax( 3.GB * task.attempt, 'memory' ) } - cpus = 8 + cpus = 8 + ext.analyse_type = params.alignment_stats_label } withName: SALMON_QUANT { @@ -145,6 +151,7 @@ process { time = { checkMax( 1.h * task.attempt, 'time' ) } memory = { checkMax( 10.GB * task.attempt, 'memory' ) } cpus = 8 + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignmentStats", @@ -157,6 +164,7 @@ process { module = toolsModuleHash['STAR'] memory = { checkMax( 50.GB * task.attempt, 'memory' ) } cpus = 8 + ext.analyse_type = params.alignment_stats_label } withName: STAR_ALIGN { @@ -164,6 +172,7 @@ process { memory = { checkMax( 20.GB * task.attempt, 'memory' ) } cpus = 2 time = { checkMax( 1.d * task.attempt, 'memory' ) } + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignmentStats", @@ -184,6 +193,7 @@ process { "-m ${params.min_overlap}", "-M ${params.max_overlap}" ].join(' ') + ext.analyse_type = params.join_pairs_label publishDir = [ path: "${params.outdir}/joinPair", @@ -197,6 +207,7 @@ process { time = { checkMax( 5.h * task.attempt, 'time' ) } memory = { checkMax( 2.GB * task.attempt, 'memory' ) } cpus = 4 + ext.analyse_type = params.join_pairs_label ext.args = [ "-max_target_seqs ${params.blast_max_target}", @@ -352,6 +363,8 @@ process { time = { checkMax( 3.h * task.attempt, 'time' ) } cpus = { checkMax( 28 * task.attempt, 'cpus' ) } + ext.analyse_type = params.contamination_search_label + publishDir = [ path: "${params.outdir}/rRNA", mode: 'copy', @@ -374,6 +387,8 @@ process { memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) } time = { checkMax( 3.h * task.attempt, 'time' ) } + ext.analyse_type = params.alignment_stats_label + publishDir = [ path: "${params.outdir}/alignmentStats/qualimap", mode: 'copy', diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config index b715b7a..6719d12 100644 --- a/conf/dependencies_genobioinfo.config +++ b/conf/dependencies_genobioinfo.config @@ -25,7 +25,7 @@ toolsModuleHash['BLAST'] = ['bioinfo/NCBI_Blast+/2.10.0+'] // SHARED MODULES //========================================= toolsModuleHash['SEQTK'] = ['bioinfo/Seqtk/1.3'] -toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.24.1'] toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6'] // version upgraded face to genologin toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20'] toolsModuleHash['KRONA'] = ['bioinfo/Krona/2.8.1'] // version upgraded face to genologin diff --git a/conf/functions.config b/conf/functions.config index e4ff017..7c8b722 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -1,3 +1,5 @@ +import org.yaml.snakeyaml.Yaml + def helpMessage() { log.info""" @@ -208,4 +210,54 @@ def sendFinalMail(formatted_date, summary) { output_tf.withWriter { w -> w << email_txt } return mail_sent +} + +// +// [nf-core] Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + return version_string +} + +// +// inspired from [nf-core] Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + + versions = yaml.load(yaml_file).collectEntries { k, v -> + if (v != null) { + return [k.tokenize(':')[0], v] + } + } + + return yaml.dumpAsMap(versions).trim() +} + +// +// [nf-core] Get workflow version for pipeline +// +def workflowVersionToYAML() { + // Workflow: + return """ + Workflow - $workflow.manifest.name: ${getWorkflowVersion()} + Workflow - Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// [nf-core] Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) } \ No newline at end of file diff --git a/main.nf b/main.nf index 639c4f5..403cbe4 100644 --- a/main.nf +++ b/main.nf @@ -32,7 +32,7 @@ params.summary.collect{k,v -> println "$k : $v"} NAMED WORKFLOW FOR PIPELINE ======================================================================================== */ -include { SHORT_READS_QC } from "$baseDir/workflow/illumina_qc.nf" +include { SHORT_READS_QC } from "$baseDir/workflow/short_reads_qc.nf" workflow PLAGE { SHORT_READS_QC() diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 43e8128..ccb92b7 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -46,11 +46,17 @@ process FASTQC { output: tuple val(name), path("*_fastqc.html") , emit: html tuple val(name), path("*_fastqc.zip") , emit: zip + path("versions.yml") , emit: versions // path log files script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read} + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS """ } @@ -64,12 +70,18 @@ process ILLUMINA_FILTER { output: tuple val("$name"), path("*.fastq.gz"), emit: reads path("*.output"), emit: log + path("versions.yml") , emit: versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + '${analyse_type} - fastq_illumina_filter': \$( fastq_illumina_filter -h | head -1 | sed -n 's/.*version \\([0-9.]*\\).*/\\1/p' ) + END_VERSIONS """ - + // } process FASTQSCREEN { @@ -80,17 +92,23 @@ process FASTQSCREEN { output: tuple val(sample), path("*.txt"), emit: report + path("versions.yml") , emit: versions script: def args = task.ext.args ?: '' def defaultConf = "${baseDir}/assets/fastq_screen.conf_example" def inputConf = "${params.inputdir}/fastq_screen.conf" def confFile = file(inputConf).exists() ? inputConf : defaultConf + def analyse_type = task.ext.analyse_type ?: params.default_label """ fastq_screen \\ $reads \\ --conf ${confFile} \\ $args + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - fastq_screen: \$( fastq_screen --version | sed '/FastQ Screen v/!d; s/.*v//' ) + END_VERSIONS """ } @@ -102,12 +120,14 @@ process DUPLICATED_READS { output: tuple val(sample), path("*.json"), emit: json - tuple val(sample), path("*.log") + tuple val(sample), path("*.log"), emit: log + path("versions.yml") , emit: versions shell: R1_name=file(fastq[0]).simpleName R2_name=file(fastq[1]).simpleName def args = task.ext.args ?: '' + analyse_type = task.ext.analyse_type ?: params.default_label ''' fastp \ -i !{fastq[0]} \ @@ -120,6 +140,10 @@ process DUPLICATED_READS { --json !{R1_name}_fastp.json \ !{args} \ 2> !{R1_name}.log + + cat <<-END_VERSIONS > versions.yml + !{analyse_type} - fastp: $(fastp --version 2>&1 | sed -e 's/fastp //g') + END_VERSIONS ''' } diff --git a/modules/local/module_diversity.nf b/modules/local/module_diversity.nf index b2a27ab..787a49d 100644 --- a/modules/local/module_diversity.nf +++ b/modules/local/module_diversity.nf @@ -13,9 +13,11 @@ process JOIN_PAIR { tuple val(sample), path("*.notCombined_*.fastq.gz"), emit: notCombined tuple val(sample), path("*.log"), emit: logs tuple val(sample), path("*.hist"), emit: histogram + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ flash \\ $reads \\ @@ -26,6 +28,10 @@ process JOIN_PAIR { > ${sample}_flash.log mv ${sample}.hist ${sample}_flash.hist + +cat <<-END_VERSIONS > versions.yml +${analyse_type} - flash: \$( flash --version | sed \'/^FLASH v/!d; s/.*v//' ) +END_VERSIONS """ } @@ -39,9 +45,11 @@ process BLAST_N { output: tuple val(sample), path("*.blastn"), emit: results + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ db_dir=\$(dirname $db) [[ `find -L \$db_dir -name "*.00.idx"` ]] && isIndexed='true' || isIndexed='false' @@ -53,6 +61,10 @@ process BLAST_N { -use_index \$isIndexed \\ $args \\ -out ${sample}.blastn + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - blastn: \$(blastn -version 2>&1 | sed '/^.*blastn: /!d; s/.*: //') + END_VERSIONS """ } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 02836b5..66fc4e2 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -13,12 +13,18 @@ process BWA_ALIGNMENT { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.sam"), emit: sam + path("versions.yml"), versions script: def reference = params.reference_genome ?: params.reference_transcriptome def referenceName=file(reference).toString().split('/')[6] + def analyse_type = task.ext.analyse_type ?: params.default_label """ bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - bwa: \$( bwa 2>&1 | sed '/^Version/!d; s/.*: //' ) + END_VERSIONS """ } @@ -32,10 +38,16 @@ process SAMTOOLS_VIEW { output: tuple val(sample), path("*.bam"), emit: bam + path("versions.yml"), versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } @@ -51,11 +63,17 @@ process SAMTOOLS_SORT { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.bam"), emit: bam + path("versions.yml"), versions //path("*.bam"), emit: bam script: // Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97 + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } @@ -71,10 +89,16 @@ process SAMTOOLS_FLAGSTATS { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.txt"), emit: txt + path("versions.yml"), versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools flagstat ${bam} > ${sample}_flagstat.txt 2>> ${sample}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } diff --git a/modules/local/module_rna.nf b/modules/local/module_rna.nf index bd1a288..5201617 100644 --- a/modules/local/module_rna.nf +++ b/modules/local/module_rna.nf @@ -7,13 +7,19 @@ process SALMON_INDEX { output: path("index/"), emit: index + path("versions.yml"), emit: versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ salmon index \ -t ${params.reference_transcriptome} \ -i ./index \ --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - salmon: \$(salmon --version | sed 's/salmon //') + END_VERSIONS """ } @@ -28,12 +34,13 @@ process SALMON_QUANT { output: tuple val(sample), path("$sample/"), emit: results - path("versions.yml"), emit: version + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' def R1 = reads.find { it =~ /.*_R1_.*/} def R2 = reads.find { it =~ /.*_R2_.*/} + def analyse_type = task.ext.analyse_type ?: params.default_label """ salmon quant \\ --libType ${lib_type} \\ @@ -45,10 +52,9 @@ process SALMON_QUANT { $args \\ 2> /dev/null - cat <<-END_VERSIONS > versions.yml - "${task.process}": - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") - END_VERSIONS + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS """ } @@ -62,11 +68,13 @@ process STAR_INDEX { output: path("index/"), emit: index + path("versions.yml"), emit: versions script: // renamme en .fa ?? utile ?? def args = task.ext.args ?: '' def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fai}` @@ -78,7 +86,10 @@ process STAR_INDEX { --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $args - + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - star: \$(STAR --version) + END_VERSIONS """ } @@ -94,10 +105,12 @@ process STAR_ALIGN { tuple val(sample), path("${sample}_Log.final.out"), emit: results tuple val(sample), path("${sample}_Log.out"), emit: log tuple val(sample), path("${sample}_Aligned.out.sam"), emit: sam + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' def read_files_cmd = reads[0].endsWith('.gz') ? '--readFilesCommand zcat' : '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ STAR \\ --outFileNamePrefix ${sample}_ \\ @@ -107,5 +120,8 @@ process STAR_ALIGN { --readFilesIn $reads \\ $read_files_cmd + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - star: \$(STAR --version) + END_VERSIONS """ } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 8bff246..1a5d1b7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ params { min_overlap = 20 max_overlap = 55 max_mismatch_density = 0.1 - assignation_databank = '' + assignation_databank = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial' blast_outfmt = 7 blast_max_target = 10 @@ -72,6 +72,14 @@ params { max_time = "90.d" max_cpus = "48" + // Labels to display tool versions in MultiQC report + default_label = 'Pipeline' + read_stats_label = 'ReadStats' + duplicats_label = 'Duplicats' + contamination_search_label = 'ContaminationSearch' + join_pairs_label = 'JoinPairs' + alignment_stats_label = 'AlignmentStats' + // OTHERS cluster_options = '' is_dev_mode = false diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index 61f8885..5530f45 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -39,6 +39,8 @@ workflow CORE_ILLUMINA { readsetsFile main: + ch_versions = Channel.empty() + // ----------- DemultiplexStat PREP_DEMUXSTAT(sampleSheet) DEMUX_STATS(demuxStatXML, PREP_DEMUXSTAT.out, demuxSummary) @@ -50,6 +52,7 @@ workflow CORE_ILLUMINA { } else { // Si MiSeq ou Nova + noIndex ILLUMINA_FILTER(fastq) fastq_good = ILLUMINA_FILTER.out.reads + ch_versions = ch_versions.mix(ILLUMINA_FILTER.out.versions) } if (params.insert_to_ngl){ @@ -61,5 +64,6 @@ workflow CORE_ILLUMINA { emit: fastq = fastq_good demuxStat = DEMUX_STATS.out.demultiplexStatsTSV + versions = ch_versions } diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index 20d45dc..f19e856 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -30,11 +30,15 @@ workflow CORE { ch_read main: + ch_versions = Channel.empty() + // ----------- FASTQC FASTQC(ch_read) + ch_versions = ch_versions.mix(FASTQC.out.versions) // ----------- ContaminationSearch FASTQSCREEN(ch_read) + ch_versions = ch_versions.mix(FASTQSCREEN.out.versions) // ----------- Recherche Duplicats GUNZIP(ch_read) @@ -62,10 +66,12 @@ workflow CORE { .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] } .groupTuple() ) // need fastq paired !!! + ch_versions = ch_versions.mix(DUPLICATED_READS.out.versions) emit: fastqc_report = FASTQC.out.zip ?: Channel.empty() fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty() fastp_report = DUPLICATED_READS.out.json subset_fastq = unzipped_fastq + versions = ch_versions } diff --git a/sub-workflows/local/diversity_qc.nf b/sub-workflows/local/diversity_qc.nf index 06c59d0..a46177c 100644 --- a/sub-workflows/local/diversity_qc.nf +++ b/sub-workflows/local/diversity_qc.nf @@ -24,8 +24,11 @@ workflow DIVERSITY_QC { fastq main: + ch_versions = Channel.empty() + // Pairs merging JOIN_PAIR(fastq) + ch_versions = ch_versions.mix(JOIN_PAIR.out.versions) // SubsetAssignation if (params.assignation_databank != '') { @@ -37,6 +40,7 @@ workflow DIVERSITY_QC { // -- Taxonomic assignation BLAST_N(FQ_TO_FA.out.fasta, params.assignation_databank) + ch_versions = ch_versions.mix(BLAST_N.out.versions) KRONA_BLAST(BLAST_N.out.results) krona_html = KRONA_BLAST.out.html @@ -49,4 +53,5 @@ workflow DIVERSITY_QC { histogram = JOIN_PAIR.out.histogram logs = JOIN_PAIR.out.logs krona = krona_html + versions = ch_versions } \ No newline at end of file diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf index 7f39268..b7d5e6a 100644 --- a/sub-workflows/local/dna_qc.nf +++ b/sub-workflows/local/dna_qc.nf @@ -25,6 +25,8 @@ workflow DNA_QC { fastq main: + ch_versions = Channel.empty() + if ( "$params.reference_genome" != '' || "$params.reference_transcriptome" != '') { BWA_ALIGNMENT(fastq) SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam) @@ -35,6 +37,12 @@ workflow DNA_QC { qualimap_report_emitted = QUALIMAP.out.report flagstats_output_emitted = SAMTOOLS_FLAGSTATS.out.txt bam_output_emitted = SAMTOOLS_SORT.out.bam + ch_versions = ch_versions.mix( + BWA_ALIGNMENT.out.versions, + SAMTOOLS_VIEW.out.versions, + SAMTOOLS_SORT.out.versions, + SAMTOOLS_FLAGSTATS.out.versions + ) } else { System.out.println "Pas de référence genomique ou transcriptomique renseignée, on ne peut pas faire d'alignement" @@ -48,4 +56,5 @@ workflow DNA_QC { qualimap_report = qualimap_report_emitted flagstats_output = flagstats_output_emitted bam = bam_output_emitted + versions = ch_versions } \ No newline at end of file diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf index 61d6638..bfac0d8 100644 --- a/sub-workflows/local/rna_qc.nf +++ b/sub-workflows/local/rna_qc.nf @@ -39,9 +39,11 @@ workflow RNA_QC { sortmerna_db main: - fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple() + ch_versions = Channel.empty() align_results = Channel.empty() + fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple() + if ( "$params.reference_genome" != '' ) { // if indexFiles does not exist if ( ! file(file(params.reference_genome).getParent() + '/SAindex').exists() || params.make_star_index) { @@ -49,6 +51,7 @@ workflow RNA_QC { reference_genome = Channel.from(params.reference_genome) genome_index = SAMTOOLS_FAIDX(reference_genome).index star_index = STAR_INDEX(reference_genome, genome_index).index + ch_versions = ch_versions.mix(STAR_INDEX.out.versions) } else { star_index = Channel.from(file(params.reference_genome).getParent()) } @@ -58,6 +61,12 @@ workflow RNA_QC { SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam) SAMTOOLS_FLAGSTATS(SAMTOOLS_VIEW.out.bam) qualimap_report_emitted = QUALIMAP(SAMTOOLS_SORT.out.bam).report + ch_versions = ch_versions.mix( + STAR_ALIGN.out.versions, + SAMTOOLS_VIEW.out.versions, + SAMTOOLS_SORT.out.versions, + SAMTOOLS_FLAGSTATS.out.versions + ) } else if ("$params.reference_transcriptome" != '') { // 10X + transcriptome > use BWA @@ -70,12 +79,14 @@ workflow RNA_QC { ) align_results = BWA.out.flagstats_output qualimap_report_emitted = BWA.out.qualimap_report + ch_versions = ch_versions.mix(BWA.out.versions) } else { // if indexFiles does not exist if ( ! file(file(params.reference_transcriptome).getParent() + '/seq.bin').exists()) { println "SALMON index files does not exists -> Let's start transcriptome indexing..." salmon_index = SALMON_INDEX().index + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) } else { salmon_index = Channel.from(file(params.reference_transcriptome).getParent()) } @@ -98,6 +109,7 @@ workflow RNA_QC { ch_lib_type ).results qualimap_report_emitted= Channel.empty() + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) } } else { @@ -114,4 +126,5 @@ workflow RNA_QC { sortmerna_log = SORTMERNA.out.log qualimap_report = qualimap_report_emitted //flagstats_output = flagstats_output_emitted + versions = ch_versions } \ No newline at end of file diff --git a/workflow/illumina_qc.nf b/workflow/short_reads_qc.nf similarity index 94% rename from workflow/illumina_qc.nf rename to workflow/short_reads_qc.nf index d2d87ca..f393c3d 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/short_reads_qc.nf @@ -6,7 +6,8 @@ nextflow.enable.dsl = 2 include { helpMessage; createSummary; sendBeginMail; - sendFinalMail + sendFinalMail; + softwareVersionsToYAML } from "$baseDir/conf/functions.config" // Show help message @@ -124,6 +125,8 @@ sendBeginMail(format.format(new Date())) // ------------------------------------------------- workflow SHORT_READS_QC { ch_mqc = Channel.empty() + ch_versions = Channel.empty() + WORKFLOW_SUMMARY() if (params.insert_to_ngl){ @@ -140,6 +143,7 @@ workflow SHORT_READS_QC { if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) { CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created) fastq = CORE_ILLUMINA.out.fastq + ch_versions = ch_versions.mix(CORE_ILLUMINA.out.versions) } else { fastq = ch_read } @@ -148,6 +152,7 @@ workflow SHORT_READS_QC { } CORE(fastq) + ch_versions = ch_versions.mix(CORE.out.versions) if (params.data_nature =~ 'DNA|GENOMIC') { DNA_QC(CORE.out.subset_fastq @@ -160,6 +165,7 @@ workflow SHORT_READS_QC { DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix(DNA_QC.out.versions) // DTM process if (params.DTM_mode) { @@ -174,6 +180,7 @@ workflow SHORT_READS_QC { RNA_QC.out.sortmerna_log.collect{it[1]}.ifEmpty([]), RNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), ) + ch_versions = ch_versions.mix(RNA_QC.out.versions) } else if (params.data_nature =~ "16S|Amplicon|METAGENOMIC|METATRANSCRIPTOMIC") { DIVERSITY_QC(fastq @@ -187,18 +194,28 @@ workflow SHORT_READS_QC { DIVERSITY_QC.out.histogram.collect{it[1]}.ifEmpty([]), DIVERSITY_QC.out.logs.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix(DIVERSITY_QC.out.versions) } else { System.out.println "Le QC des données ${params.data_nature} n'a pas de sub-workflow spécifique pour le moment." ch_mqc = ch_mqc.mix( Channel.empty() ) } + + version_yaml = softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_mqc_versions.yml', + sort: true, + newLine: true + ) MULTIQC(WORKFLOW_SUMMARY.out.ifEmpty([]) .mix( CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]), CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]), CORE.out.fastp_report.collect{it[1]}.ifEmpty([]), - ch_mqc.collect().ifEmpty([]) + ch_mqc.collect().ifEmpty([]), + version_yaml ).collect() ) -- GitLab From 7669d78bccbcf2c32b139fee993b526fedc1621c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Wed, 4 Sep 2024 10:35:55 +0200 Subject: [PATCH 4/9] Remove useless CICD files --- .gitlab-ci.yml | 40 ---------------------------------------- Dockerfile | 7 ------- 2 files changed, 47 deletions(-) delete mode 100644 .gitlab-ci.yml delete mode 100644 Dockerfile diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index e336847..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,40 +0,0 @@ -# recipe for building singularity image and deploy it on the registery for template -image: - name: nextflow/nextflow - entrypoint: [""] - -stages: - - build - - deploy - - test - -push: - stage: test - script: - - nextflow run ./main.nf - - nextflow run ./main.nf --help - -# Build Singularity container bwa_v0.7.17.sif -singularity-image: - image: quay.io/singularity/singularity:v3.4.0 - stage: build - script: - - singularity build template.sif Singularityfile - artifacts: - paths: - - template.sif - only: - changes: - - Singularityfile - - environment.yml - -# Push the image template.sif on the registry -deploy: - image: quay.io/singularity/singularity:v3.4.0 - stage: deploy - script: - - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" template.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG" - only: - changes: - - Singularityfile - - environment.yml diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 167ec9d..0000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM nfcore/base:1.7 -LABEL authors="Céline Noirot" \ - description="Docker image containing all requirements for get/template pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/GeT-template-1.0dev/bin:$PATH -- GitLab From 0739b4a7ae4c206f03d9b267c5551086c31fe418 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 9 Sep 2024 14:08:15 +0200 Subject: [PATCH 5/9] treatment demuxStat logs has new file names Ref: #110 --- modules/local/module_NGL-Bi.nf | 3 ++- sub-workflows/local/core_illumina.nf | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index c32681f..995ebf5 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -26,6 +26,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA { input: val nglCode path csvFile + val lane output: path("*.log") @@ -34,7 +35,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA { script: def args = task.ext.args ?: '' forceOption = workflow.resume ? "--force" : '' - def lane = params.lane ?: '0' + def level = lane ? "run_${lane}" : 'readsets' """ perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\ --code $nglCode \\ diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index 5530f45..bb1c476 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -57,8 +57,8 @@ workflow CORE_ILLUMINA { if (params.insert_to_ngl){ // Add demultiplexStat treatments - TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV) - TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV) + TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV, params.lane) + TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV, '') } emit: -- GitLab From 0dbf55a93a381b5066f2607ec2a9def015445b18 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 9 Sep 2024 16:36:19 +0200 Subject: [PATCH 6/9] Get old readsets file if exists Ref: #108 --- conf/prod.config | 2 +- conf/test.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/prod.config b/conf/prod.config index 4f9b19c..eb1bd1a 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -7,7 +7,7 @@ process { publishDir = [ path: "${params.outdir}/ngl", mode: 'copy', - pattern: "*.{log,created}" + pattern: "*.{log,created,existing}" ] } } \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 4294ca1..c2de5cd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -14,7 +14,7 @@ process { publishDir = [ path: "${params.outdir}/ngl", mode: 'copy', - pattern: "*.{log,created}" + pattern: "*.{log,created,existing}" ] } -- GitLab From 31f164934e31f23abba63a88f561990e275cca67 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Tue, 10 Sep 2024 11:31:26 +0200 Subject: [PATCH 7/9] update version number --- conf/report.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/report.config b/conf/report.config index 12ef314..5ee8982 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.20.0' + version = '1.23.0' } \ No newline at end of file -- GitLab From 1b272703bfbe2b63d95176aae69b830e8ea6c77d Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Tue, 10 Sep 2024 11:32:30 +0200 Subject: [PATCH 8/9] Add default value in docs/usage.md --- docs/usage.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 6bf72b3..7831e2c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -64,7 +64,7 @@ _Default_ : null - **`--host`** [str] The name of the server on which the pipeline is launched. This value is used to select slurm modules to load. -_Default_ : genologin +_Default_ : genobioinfo - **`--shared_modules`** [str] Path to the shared_modules sources. This is nextflow modules shared between several pipelines. @@ -195,7 +195,7 @@ _Default_ : 0.1 - **`--assignation_databank`** [str] Path to the databank for taxonomic assignment. -_Default_ : null +_Default_ : /save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial - **`--blast_outfmt`** [int] BLAST output format. -- GitLab From 259fab8c77435ea23656f09918e28563ad3aad5b Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Tue, 10 Sep 2024 11:33:15 +0200 Subject: [PATCH 9/9] Duplicated_reads process earn task.ext.args --- modules/local/module_core.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index ccb92b7..be422b7 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -126,7 +126,7 @@ process DUPLICATED_READS { shell: R1_name=file(fastq[0]).simpleName R2_name=file(fastq[1]).simpleName - def args = task.ext.args ?: '' + args = task.ext.args ?: '' analyse_type = task.ext.analyse_type ?: params.default_label ''' fastp \ -- GitLab