From dd32059ffdd88eb67434d9fa07aa3b18d0b3be1e Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 22 Aug 2024 17:03:38 +0200
Subject: [PATCH 1/9] Update manifest version

---
 conf/report.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/report.config b/conf/report.config
index 8a5bb71..12ef314 100644
--- a/conf/report.config
+++ b/conf/report.config
@@ -29,5 +29,5 @@ manifest {
 	description = "Workflow for Illumina data quality control"
 	mainScript = 'main.nf'
 	nextflowVersion = '>=0.32.0'
-	version = '1.19.0'
+	version = '1.20.0'
 }
\ No newline at end of file
-- 
GitLab


From a9c5ac3581eec41a517528ce4fc486a62bc03b84 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 2 Sep 2024 14:13:00 +0200
Subject: [PATCH 2/9] SortmeRNA runs faster

	Ref: #109
---
 conf/base.config | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 465f361..3e613d4 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -324,6 +324,10 @@ process {
 		module = toolsModuleHash['SEQTK']
 	}
 
+	withName: ADD_MULTIQC {
+		errorStrategy = 'ignore'
+	}
+
 	withName: MULTIQC {		
         ext.args = [
             "--config ${baseDir}/assets/multiqc_config.yaml",
@@ -344,9 +348,9 @@ process {
 
 	withName: SORTMERNA {
 		module  = toolsModuleHash['SORTMERNA']
-        memory  = { checkMax( 10.GB * task.attempt * params.resource_factor, 'memory' ) }
-        time    = { checkMax( 10.h * task.attempt, 'time' ) }
-        cpus    = { checkMax( 1 * task.attempt, 'cpus' ) }
+        memory  = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) }
+        time    = { checkMax( 3.h * task.attempt, 'time' ) }
+        cpus    = { checkMax( 28 * task.attempt, 'cpus' ) }
 
         publishDir = [
             path: "${params.outdir}/rRNA",
-- 
GitLab


From a7446f6c641c6b8ea13cc5dc4379df42b733fc5f Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 4 Sep 2024 09:41:29 +0200
Subject: [PATCH 3/9] Get version of each tools and use it in MQC report

	Ref: #15
---
 assets/multiqc_config.yaml                    | 10 ++--
 conf/base.config                              | 19 ++++++-
 conf/dependencies_genobioinfo.config          |  2 +-
 conf/functions.config                         | 52 +++++++++++++++++++
 main.nf                                       |  2 +-
 modules/local/module_core.nf                  | 28 +++++++++-
 modules/local/module_diversity.nf             | 12 +++++
 modules/local/module_dna.nf                   | 24 +++++++++
 modules/local/module_rna.nf                   | 28 +++++++---
 nextflow.config                               | 10 +++-
 sub-workflows/local/core_illumina.nf          |  4 ++
 sub-workflows/local/core_pipeline.nf          |  6 +++
 sub-workflows/local/diversity_qc.nf           |  5 ++
 sub-workflows/local/dna_qc.nf                 |  9 ++++
 sub-workflows/local/rna_qc.nf                 | 15 +++++-
 .../{illumina_qc.nf => short_reads_qc.nf}     | 21 +++++++-
 16 files changed, 226 insertions(+), 21 deletions(-)
 rename workflow/{illumina_qc.nf => short_reads_qc.nf} (94%)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index b5f8ae5..75a0989 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -11,14 +11,14 @@ report_comment: >
 
 show_analysis_paths: False
 show_analysis_time: False
-
+disable_version_detection: true
 ## Number formatting
 thousandsSep_format: " "
 
 ## General Statistics table
 table_columns_visible:
-  Duplicats: False
-  ContaminationSearch - RNA: True
+  Duplicats: True
+  ContaminationSearch - rRNA: True
   samtools: False
   ReadsStats:
     percent_duplicates: False
@@ -43,7 +43,7 @@ extra_fn_clean_exts:
   - "_screen"
 
 ## Plot config
-export_plots: true
+export_plots: false
 plots_force_interactive: true
 
 ## Module config
@@ -88,7 +88,7 @@ module_order:
 
 # Pattern
 sp:
-  fastqc:
+  fastqc/zip:
     fn: "*.zip"
   fastq_screen:
     fn: '*_screen.txt'
diff --git a/conf/base.config b/conf/base.config
index 3e613d4..965385f 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -69,6 +69,7 @@ process {
 			saveAs: { filename -> "${name}.fastq.gz" }*/
 		]
 
+		ext.analyse_type = params.read_stats_label
 		module = toolsModuleHash['ILLUMINA_FILTER']
 		cpus = { checkMax( 3 * task.attempt, 'cpus' ) }
 		time = { checkMax( 4.h * task.attempt, 'time' ) }
@@ -82,7 +83,7 @@ process {
 		]
 
 		ext.args = "--reads_to_process ${params.fastp_n_reads}"
-
+		ext.analyse_type = params.duplicats_label
 		module = toolsModuleHash['FASTP']
 		time = { checkMax( 5.h * task.attempt, 'time' ) }
 		memory = { checkMax( 3.GB * task.attempt, 'memory' ) }
@@ -103,6 +104,7 @@ process {
 			saveAs: { filename -> "${name}.html" }
 		]
 		
+		ext.analyse_type = params.read_stats_label
 		module = toolsModuleHash['FASTQC'] 
 		maxRetries = 4
 		cpus = { checkMax( 2 * task.attempt, 'cpus' ) }
@@ -112,6 +114,7 @@ process {
 	withName: FASTQSCREEN {
 		time = { checkMax( 1.h * task.attempt, 'time' ) }
 		module = toolsModuleHash['FASTQSCREEN']
+		ext.analyse_type = params.contamination_search_label
 
 		publishDir = [
 			path: "${params.outdir}/ContaminationSearch/FastQ-Screen",
@@ -125,6 +128,8 @@ process {
 		cpus = { checkMax( 6 * task.attempt, 'cpus' ) }
 	    memory = { checkMax( 16.GB * task.attempt, 'memory' ) }
 	    time = { checkMax( 3.d * task.attempt, 'time' ) }
+		
+		ext.analyse_type = params.alignment_stats_label
 
 		publishDir = [
 			path: "${params.outdir}/alignment/bwa",
@@ -137,7 +142,8 @@ process {
 		module = toolsModuleHash['SALMON']
 		time = { checkMax( 1.h * task.attempt, 'time' ) }
 		memory = { checkMax( 3.GB * task.attempt, 'memory' ) }
-		cpus = 8 
+		cpus = 8
+		ext.analyse_type = params.alignment_stats_label
 	}
 
 	withName: SALMON_QUANT {
@@ -145,6 +151,7 @@ process {
 		time = { checkMax( 1.h * task.attempt, 'time' ) }
 		memory = { checkMax( 10.GB * task.attempt, 'memory' ) }
 		cpus = 8 
+		ext.analyse_type = params.alignment_stats_label
 
 		publishDir = [
 			path: "${params.outdir}/alignmentStats",
@@ -157,6 +164,7 @@ process {
 		module = toolsModuleHash['STAR']
 		memory = { checkMax( 50.GB * task.attempt, 'memory' ) }
 		cpus = 8
+		ext.analyse_type = params.alignment_stats_label
 	}
 
 	withName: STAR_ALIGN {	
@@ -164,6 +172,7 @@ process {
 		memory = { checkMax( 20.GB * task.attempt, 'memory' ) }
 		cpus = 2
 		time = { checkMax( 1.d * task.attempt, 'memory' ) }
+		ext.analyse_type = params.alignment_stats_label
 
 		publishDir = [
 			path: "${params.outdir}/alignmentStats",
@@ -184,6 +193,7 @@ process {
 			"-m  ${params.min_overlap}",
             "-M ${params.max_overlap}"
 		].join(' ')
+		ext.analyse_type = params.join_pairs_label
 		
 		publishDir = [
 			path: "${params.outdir}/joinPair",
@@ -197,6 +207,7 @@ process {
 		time = { checkMax( 5.h * task.attempt, 'time' ) }
 		memory = { checkMax( 2.GB * task.attempt, 'memory' ) }
 		cpus = 4
+		ext.analyse_type = params.join_pairs_label
 
 		ext.args = [
 			"-max_target_seqs ${params.blast_max_target}",
@@ -352,6 +363,8 @@ process {
         time    = { checkMax( 3.h * task.attempt, 'time' ) }
         cpus    = { checkMax( 28 * task.attempt, 'cpus' ) }
 
+		ext.analyse_type = params.contamination_search_label
+
         publishDir = [
             path: "${params.outdir}/rRNA",
             mode: 'copy',
@@ -374,6 +387,8 @@ process {
 	    memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) }
 	    time = { checkMax( 3.h * task.attempt, 'time' ) }
 
+		ext.analyse_type = params.alignment_stats_label
+
 		publishDir = [
 			path: "${params.outdir}/alignmentStats/qualimap",
 			mode: 'copy',
diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config
index b715b7a..6719d12 100644
--- a/conf/dependencies_genobioinfo.config
+++ b/conf/dependencies_genobioinfo.config
@@ -25,7 +25,7 @@ toolsModuleHash['BLAST'] = ['bioinfo/NCBI_Blast+/2.10.0+']
 //			   SHARED MODULES
 //=========================================
 toolsModuleHash['SEQTK'] = ['bioinfo/Seqtk/1.3']
-toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14']
+toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.24.1']
 toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6']  // version upgraded face to genologin
 toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20']
 toolsModuleHash['KRONA'] = ['bioinfo/Krona/2.8.1']      // version upgraded face to genologin
diff --git a/conf/functions.config b/conf/functions.config
index e4ff017..7c8b722 100644
--- a/conf/functions.config
+++ b/conf/functions.config
@@ -1,3 +1,5 @@
+import org.yaml.snakeyaml.Yaml
+
 def helpMessage() {
     log.info"""
 
@@ -208,4 +210,54 @@ def sendFinalMail(formatted_date, summary) {
 	output_tf.withWriter { w -> w << email_txt }
 
     return mail_sent
+}
+
+//
+//  [nf-core] Generate workflow version string
+//
+def getWorkflowVersion() {
+    String version_string = ""
+    if (workflow.manifest.version) {
+        def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+        version_string += "${prefix_v}${workflow.manifest.version}"
+    }
+
+    return version_string
+}
+
+//
+// inspired from [nf-core] Get software versions for pipeline
+//
+def processVersionsFromYAML(yaml_file) {
+    Yaml yaml = new Yaml()
+    
+    versions = yaml.load(yaml_file).collectEntries { k, v -> 
+        if (v != null) {
+            return [k.tokenize(':')[0], v]
+        }
+    }
+    
+    return yaml.dumpAsMap(versions).trim()
+}
+
+//
+// [nf-core] Get workflow version for pipeline
+//
+def workflowVersionToYAML() {
+    //    Workflow:
+    return """
+    Workflow - $workflow.manifest.name: ${getWorkflowVersion()}
+    Workflow - Nextflow: $workflow.nextflow.version
+    """.stripIndent().trim()
+}
+
+//
+// [nf-core] Get channel of software versions used in pipeline in YAML format
+//
+def softwareVersionsToYAML(ch_versions) {
+    return ch_versions
+        .unique()
+        .map { processVersionsFromYAML(it) }
+        .unique()
+        .mix(Channel.of(workflowVersionToYAML()))
 }
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 639c4f5..403cbe4 100644
--- a/main.nf
+++ b/main.nf
@@ -32,7 +32,7 @@ params.summary.collect{k,v -> println "$k : $v"}
     NAMED WORKFLOW FOR PIPELINE
 ========================================================================================
 */
-include { SHORT_READS_QC } from "$baseDir/workflow/illumina_qc.nf"
+include { SHORT_READS_QC } from "$baseDir/workflow/short_reads_qc.nf"
 
 workflow PLAGE {
     SHORT_READS_QC()
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index 43e8128..ccb92b7 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -46,11 +46,17 @@ process FASTQC {
 	output:
 		tuple val(name), path("*_fastqc.html") , emit: html
 		tuple val(name), path("*_fastqc.zip") , emit: zip
+		path("versions.yml") , emit: versions
 		// path log files
 		
 	script:
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read}
+
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+	END_VERSIONS
 	"""
 }
 
@@ -64,12 +70,18 @@ process ILLUMINA_FILTER {
 	output:
 		tuple val("$name"), path("*.fastq.gz"), emit: reads
 		path("*.output"), emit: log
+		path("versions.yml") , emit: versions
 	
 	script:
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz	
+		
+	cat <<-END_VERSIONS > versions.yml
+	'${analyse_type} - fastq_illumina_filter': \$( fastq_illumina_filter -h | head -1 | sed -n 's/.*version \\([0-9.]*\\).*/\\1/p'  )
+	END_VERSIONS
 	"""
-	
+			//	
 }
 
 process FASTQSCREEN {	
@@ -80,17 +92,23 @@ process FASTQSCREEN {
 	
 	output:
 		tuple val(sample), path("*.txt"), emit: report
+		path("versions.yml") , emit: versions
 	
 	script:
 	def args = task.ext.args ?: ''
 	def defaultConf = "${baseDir}/assets/fastq_screen.conf_example"
 	def inputConf = "${params.inputdir}/fastq_screen.conf"
 	def confFile = file(inputConf).exists() ? inputConf : defaultConf
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		fastq_screen \\
 			$reads \\
 			--conf ${confFile} \\
 			$args
+
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - fastq_screen: \$( fastq_screen --version | sed '/FastQ Screen v/!d; s/.*v//' )
+	END_VERSIONS
 	"""
 }
 
@@ -102,12 +120,14 @@ process DUPLICATED_READS {
 
 	output:
 		tuple val(sample), path("*.json"), emit: json
-		tuple val(sample), path("*.log")
+		tuple val(sample), path("*.log"), emit: log
+		path("versions.yml") , emit: versions
 
 	shell:
 	R1_name=file(fastq[0]).simpleName
 	R2_name=file(fastq[1]).simpleName
 	def args = task.ext.args ?: ''
+	analyse_type = task.ext.analyse_type ?: params.default_label
 	'''
 		fastp \
 		-i !{fastq[0]} \
@@ -120,6 +140,10 @@ process DUPLICATED_READS {
 		--json !{R1_name}_fastp.json \
 		!{args} \
 		2> !{R1_name}.log
+
+	cat <<-END_VERSIONS > versions.yml
+	!{analyse_type} - fastp: $(fastp --version 2>&1 | sed -e 's/fastp //g')
+	END_VERSIONS
 	'''
 }
 
diff --git a/modules/local/module_diversity.nf b/modules/local/module_diversity.nf
index b2a27ab..787a49d 100644
--- a/modules/local/module_diversity.nf
+++ b/modules/local/module_diversity.nf
@@ -13,9 +13,11 @@ process JOIN_PAIR {
         tuple val(sample), path("*.notCombined_*.fastq.gz"), emit: notCombined
         tuple val(sample), path("*.log"), emit: logs
         tuple val(sample), path("*.hist"), emit: histogram
+        path("versions.yml"), emit: versions
 	
 	script:
     def args = task.ext.args ?: ''
+    def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		flash \\
             $reads \\
@@ -26,6 +28,10 @@ process JOIN_PAIR {
             > ${sample}_flash.log
 
         mv ${sample}.hist ${sample}_flash.hist
+
+cat <<-END_VERSIONS > versions.yml
+${analyse_type} - flash: \$( flash --version | sed \'/^FLASH v/!d; s/.*v//' )
+END_VERSIONS
 	"""
 }
 
@@ -39,9 +45,11 @@ process BLAST_N {
 
     output:
         tuple val(sample), path("*.blastn"), emit: results
+        path("versions.yml"), emit: versions
 
     script:
     def args = task.ext.args ?: ''
+    def analyse_type = task.ext.analyse_type ?: params.default_label
     """
         db_dir=\$(dirname $db)
         [[ `find -L \$db_dir -name "*.00.idx"` ]] && isIndexed='true' || isIndexed='false'
@@ -53,6 +61,10 @@ process BLAST_N {
             -use_index \$isIndexed \\
             $args \\
             -out ${sample}.blastn 
+
+    cat <<-END_VERSIONS > versions.yml
+    ${analyse_type} - blastn: \$(blastn -version 2>&1 | sed '/^.*blastn: /!d;  s/.*: //')
+    END_VERSIONS
     """
 
 }
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 02836b5..66fc4e2 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -13,12 +13,18 @@ process BWA_ALIGNMENT {
 	output:
 		tuple val(sample), path("*.log"), emit: log
 		tuple val(sample), path("*.sam"), emit: sam
+		path("versions.yml"), versions
 		
 	script:
 	def reference = params.reference_genome ?: params.reference_transcriptome
 	def referenceName=file(reference).toString().split('/')[6]
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log
+	
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - bwa: \$( bwa 2>&1 | sed '/^Version/!d; s/.*: //' )
+	END_VERSIONS
 	"""
 }
 
@@ -32,10 +38,16 @@ process SAMTOOLS_VIEW {
 		
 	output:
 		tuple val(sample), path("*.bam"), emit: bam
+		path("versions.yml"), versions
 		
 	script:
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam
+	
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //')
+	END_VERSIONS
 	"""
 }
 
@@ -51,11 +63,17 @@ process SAMTOOLS_SORT {
 	output:
 		tuple val(sample), path("*.log"), emit: log
 		tuple val(sample), path("*.bam"), emit: bam
+		path("versions.yml"), versions
 		//path("*.bam"), emit: bam
 
 	script:	// Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""	
 		samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log
+	
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //')
+	END_VERSIONS
 	"""
 }
 
@@ -71,10 +89,16 @@ process SAMTOOLS_FLAGSTATS {
 	output:
 		tuple val(sample), path("*.log"), emit: log
 		tuple val(sample), path("*.txt"), emit: txt
+		path("versions.yml"), versions
 
 	script:
+	def analyse_type = task.ext.analyse_type ?: params.default_label
 	"""
 		samtools flagstat ${bam} > ${sample}_flagstat.txt 2>> ${sample}.log
+	
+	cat <<-END_VERSIONS > versions.yml
+	${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //')
+	END_VERSIONS
 	"""
 }
 
diff --git a/modules/local/module_rna.nf b/modules/local/module_rna.nf
index bd1a288..5201617 100644
--- a/modules/local/module_rna.nf
+++ b/modules/local/module_rna.nf
@@ -7,13 +7,19 @@ process SALMON_INDEX {
     
     output:
         path("index/"), emit: index
+        path("versions.yml"), emit: versions
 
     script:
+    def analyse_type = task.ext.analyse_type ?: params.default_label
     """
         salmon index \
         -t ${params.reference_transcriptome} \
         -i ./index \
         --threads ${task.cpus}
+
+    cat <<-END_VERSIONS > versions.yml
+    ${analyse_type} - salmon: \$(salmon --version | sed 's/salmon //')
+    END_VERSIONS
     """
 }
 
@@ -28,12 +34,13 @@ process SALMON_QUANT {
 
     output:
         tuple val(sample), path("$sample/"), emit: results
-        path("versions.yml"), emit: version
+        path("versions.yml"), emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def R1 = reads.find { it =~ /.*_R1_.*/}
     def R2 = reads.find { it =~ /.*_R2_.*/}
+    def analyse_type = task.ext.analyse_type ?: params.default_label
     """
         salmon quant \\
             --libType ${lib_type} \\
@@ -45,10 +52,9 @@ process SALMON_QUANT {
             $args \\
             2> /dev/null
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
-        END_VERSIONS
+    cat <<-END_VERSIONS > versions.yml
+    ${analyse_type} - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
+    END_VERSIONS
     """
 }
 
@@ -62,11 +68,13 @@ process STAR_INDEX {
     
     output:
         path("index/"), emit: index
+        path("versions.yml"), emit: versions
 
     script:
     // renamme en .fa ?? utile ??
     def args = task.ext.args ?: ''
     def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def analyse_type = task.ext.analyse_type ?: params.default_label
     """
         NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fai}`
         
@@ -78,7 +86,10 @@ process STAR_INDEX {
             --runThreadN $task.cpus \\
             --genomeSAindexNbases \$NUM_BASES \\
             $args
-       
+
+    cat <<-END_VERSIONS > versions.yml
+    ${analyse_type} - star: \$(STAR --version)
+    END_VERSIONS
     """
 }
 
@@ -94,10 +105,12 @@ process STAR_ALIGN {
         tuple val(sample), path("${sample}_Log.final.out"), emit: results
         tuple val(sample), path("${sample}_Log.out"), emit: log
         tuple val(sample), path("${sample}_Aligned.out.sam"), emit: sam
+        path("versions.yml"), emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def read_files_cmd = reads[0].endsWith('.gz') ? '--readFilesCommand zcat' : ''
+    def analyse_type = task.ext.analyse_type ?: params.default_label
     """
         STAR \\
             --outFileNamePrefix  ${sample}_ \\
@@ -107,5 +120,8 @@ process STAR_ALIGN {
             --readFilesIn $reads \\
             $read_files_cmd
 
+    cat <<-END_VERSIONS > versions.yml
+    ${analyse_type} - star: \$(STAR --version)
+    END_VERSIONS
     """
 }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 8bff246..1a5d1b7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,7 +46,7 @@ params {
 	min_overlap = 20
 	max_overlap = 55
 	max_mismatch_density = 0.1
-	assignation_databank = ''
+	assignation_databank = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial'
 	blast_outfmt = 7
 	blast_max_target = 10
 
@@ -72,6 +72,14 @@ params {
 	max_time = "90.d"
 	max_cpus = "48"
 
+	// Labels to display tool versions in MultiQC report
+	default_label = 'Pipeline'
+	read_stats_label = 'ReadStats'
+	duplicats_label = 'Duplicats'
+	contamination_search_label = 'ContaminationSearch'
+	join_pairs_label = 'JoinPairs'
+	alignment_stats_label = 'AlignmentStats'
+
 	// OTHERS
 	cluster_options = ''
 	is_dev_mode = false
diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf
index 61f8885..5530f45 100644
--- a/sub-workflows/local/core_illumina.nf
+++ b/sub-workflows/local/core_illumina.nf
@@ -39,6 +39,8 @@ workflow CORE_ILLUMINA {
 		readsetsFile
 		
 	main:		
+		ch_versions = Channel.empty()
+
 		// ----------- DemultiplexStat
 		PREP_DEMUXSTAT(sampleSheet)
 		DEMUX_STATS(demuxStatXML, PREP_DEMUXSTAT.out, demuxSummary)
@@ -50,6 +52,7 @@ workflow CORE_ILLUMINA {
 		} else {	// Si MiSeq ou Nova + noIndex
 			ILLUMINA_FILTER(fastq)
 			fastq_good = ILLUMINA_FILTER.out.reads
+			ch_versions = ch_versions.mix(ILLUMINA_FILTER.out.versions)
 		}
 
 		if (params.insert_to_ngl){
@@ -61,5 +64,6 @@ workflow CORE_ILLUMINA {
     emit:
         fastq = fastq_good
 		demuxStat = DEMUX_STATS.out.demultiplexStatsTSV
+		versions = ch_versions
 }
 
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index 20d45dc..f19e856 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -30,11 +30,15 @@ workflow CORE {
 		ch_read
 		
 	main:
+		ch_versions = Channel.empty()
+
 		// ----------- FASTQC
 		FASTQC(ch_read)
+		ch_versions = ch_versions.mix(FASTQC.out.versions)
 		
 		// ----------- ContaminationSearch
 		FASTQSCREEN(ch_read)
+		ch_versions = ch_versions.mix(FASTQSCREEN.out.versions)
 
 		// ----------- Recherche Duplicats
 		GUNZIP(ch_read)
@@ -62,10 +66,12 @@ workflow CORE {
 			.map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }
 			.groupTuple()
 		) // need fastq paired !!!
+		ch_versions = ch_versions.mix(DUPLICATED_READS.out.versions)
 		
 	emit:
 		fastqc_report = FASTQC.out.zip ?: Channel.empty()
 		fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty()
 		fastp_report = DUPLICATED_READS.out.json
 		subset_fastq = unzipped_fastq
+		versions = ch_versions
 }
diff --git a/sub-workflows/local/diversity_qc.nf b/sub-workflows/local/diversity_qc.nf
index 06c59d0..a46177c 100644
--- a/sub-workflows/local/diversity_qc.nf
+++ b/sub-workflows/local/diversity_qc.nf
@@ -24,8 +24,11 @@ workflow DIVERSITY_QC {
 		fastq
 
 	main:
+		ch_versions = Channel.empty()
+
 		// Pairs merging
 		JOIN_PAIR(fastq)
+		ch_versions = ch_versions.mix(JOIN_PAIR.out.versions)
 
 		// SubsetAssignation
 		if (params.assignation_databank != '') {
@@ -37,6 +40,7 @@ workflow DIVERSITY_QC {
 
 			// -- Taxonomic assignation
 			BLAST_N(FQ_TO_FA.out.fasta, params.assignation_databank)
+			ch_versions = ch_versions.mix(BLAST_N.out.versions)
 			KRONA_BLAST(BLAST_N.out.results)
 			krona_html = KRONA_BLAST.out.html
 
@@ -49,4 +53,5 @@ workflow DIVERSITY_QC {
 		histogram = JOIN_PAIR.out.histogram
 		logs = JOIN_PAIR.out.logs
 		krona = krona_html
+		versions = ch_versions
 }
\ No newline at end of file
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index 7f39268..b7d5e6a 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -25,6 +25,8 @@ workflow DNA_QC {
 		fastq
 
 	main:
+		ch_versions = Channel.empty()
+
 		if ( "$params.reference_genome" != '' || "$params.reference_transcriptome" != '') {
 			BWA_ALIGNMENT(fastq)
 			SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam)
@@ -35,6 +37,12 @@ workflow DNA_QC {
 			qualimap_report_emitted =  QUALIMAP.out.report
 			flagstats_output_emitted = SAMTOOLS_FLAGSTATS.out.txt
 			bam_output_emitted = SAMTOOLS_SORT.out.bam
+			ch_versions = ch_versions.mix(
+				BWA_ALIGNMENT.out.versions,
+				SAMTOOLS_VIEW.out.versions,
+				SAMTOOLS_SORT.out.versions,
+				SAMTOOLS_FLAGSTATS.out.versions
+			)
 
 		} else {
 			System.out.println "Pas de rÃ©fÃ©rence genomique ou transcriptomique renseignÃ©e, on ne peut pas faire d'alignement"
@@ -48,4 +56,5 @@ workflow DNA_QC {
 		qualimap_report = qualimap_report_emitted
 		flagstats_output = flagstats_output_emitted
 		bam = bam_output_emitted
+		versions = ch_versions
 }
\ No newline at end of file
diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf
index 61d6638..bfac0d8 100644
--- a/sub-workflows/local/rna_qc.nf
+++ b/sub-workflows/local/rna_qc.nf
@@ -39,9 +39,11 @@ workflow RNA_QC {
 		sortmerna_db
 
 	main:
-		fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple()
+		ch_versions = Channel.empty()
 		align_results = Channel.empty()
 
+		fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple()
+
 		if ( "$params.reference_genome" != '' ) {
 			// if indexFiles does not exist
 			if ( ! file(file(params.reference_genome).getParent() + '/SAindex').exists() || params.make_star_index) {
@@ -49,6 +51,7 @@ workflow RNA_QC {
 				reference_genome = Channel.from(params.reference_genome)
 				genome_index = SAMTOOLS_FAIDX(reference_genome).index
 				star_index = STAR_INDEX(reference_genome, genome_index).index
+				ch_versions = ch_versions.mix(STAR_INDEX.out.versions)
 			} else {
 				star_index = Channel.from(file(params.reference_genome).getParent())
 			}
@@ -58,6 +61,12 @@ workflow RNA_QC {
 			SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam)
 			SAMTOOLS_FLAGSTATS(SAMTOOLS_VIEW.out.bam)
 			qualimap_report_emitted = QUALIMAP(SAMTOOLS_SORT.out.bam).report
+			ch_versions = ch_versions.mix(
+				STAR_ALIGN.out.versions,
+				SAMTOOLS_VIEW.out.versions,
+				SAMTOOLS_SORT.out.versions,
+				SAMTOOLS_FLAGSTATS.out.versions
+			)
 
 		} else if ("$params.reference_transcriptome" != '') {
 			// 10X + transcriptome > use BWA
@@ -70,12 +79,14 @@ workflow RNA_QC {
 				)
 				align_results = BWA.out.flagstats_output
 				qualimap_report_emitted = BWA.out.qualimap_report
+				ch_versions = ch_versions.mix(BWA.out.versions)
 				
 			} else {
 				// if indexFiles does not exist
 				if ( ! file(file(params.reference_transcriptome).getParent() + '/seq.bin').exists()) {
 					println "SALMON index files does not exists -> Let's start transcriptome indexing..."
 					salmon_index = SALMON_INDEX().index
+					ch_versions = ch_versions.mix(SALMON_INDEX.out.versions)
 				} else {
 					salmon_index = Channel.from(file(params.reference_transcriptome).getParent())
 				}
@@ -98,6 +109,7 @@ workflow RNA_QC {
 					ch_lib_type
 				).results
 				qualimap_report_emitted= Channel.empty()
+				ch_versions = ch_versions.mix(SALMON_QUANT.out.versions)
 			}
 
 		} else {
@@ -114,4 +126,5 @@ workflow RNA_QC {
 		sortmerna_log = SORTMERNA.out.log
 		qualimap_report = qualimap_report_emitted
 		//flagstats_output = flagstats_output_emitted
+		versions = ch_versions
 }
\ No newline at end of file
diff --git a/workflow/illumina_qc.nf b/workflow/short_reads_qc.nf
similarity index 94%
rename from workflow/illumina_qc.nf
rename to workflow/short_reads_qc.nf
index d2d87ca..f393c3d 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/short_reads_qc.nf
@@ -6,7 +6,8 @@ nextflow.enable.dsl = 2
 include {	helpMessage;
 			createSummary;
 			sendBeginMail;
-			sendFinalMail
+			sendFinalMail;
+			softwareVersionsToYAML
 } from "$baseDir/conf/functions.config"
 
 // Show help message
@@ -124,6 +125,8 @@ sendBeginMail(format.format(new Date()))
 // -------------------------------------------------
 workflow SHORT_READS_QC {
 	ch_mqc = Channel.empty()
+	ch_versions = Channel.empty()
+
 	WORKFLOW_SUMMARY()
 
 	if (params.insert_to_ngl){
@@ -140,6 +143,7 @@ workflow SHORT_READS_QC {
 	if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) {
 		CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created)
 		fastq = CORE_ILLUMINA.out.fastq
+		ch_versions = ch_versions.mix(CORE_ILLUMINA.out.versions)
 	} else {
 		fastq = ch_read
 	}
@@ -148,6 +152,7 @@ workflow SHORT_READS_QC {
 	}
 
 	CORE(fastq)
+	ch_versions = ch_versions.mix(CORE.out.versions)
 
 	if (params.data_nature =~ 'DNA|GENOMIC') {
 		DNA_QC(CORE.out.subset_fastq
@@ -160,6 +165,7 @@ workflow SHORT_READS_QC {
 			DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]),
 			DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([])
 		)
+		ch_versions = ch_versions.mix(DNA_QC.out.versions)
 
 		// DTM process
 		if (params.DTM_mode) {
@@ -174,6 +180,7 @@ workflow SHORT_READS_QC {
 			RNA_QC.out.sortmerna_log.collect{it[1]}.ifEmpty([]),
 			RNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]),
 		)
+		ch_versions = ch_versions.mix(RNA_QC.out.versions)
 
 	} else if (params.data_nature =~ "16S|Amplicon|METAGENOMIC|METATRANSCRIPTOMIC") {
 		DIVERSITY_QC(fastq
@@ -187,18 +194,28 @@ workflow SHORT_READS_QC {
 			DIVERSITY_QC.out.histogram.collect{it[1]}.ifEmpty([]),
 			DIVERSITY_QC.out.logs.collect{it[1]}.ifEmpty([])
 		)
+		ch_versions = ch_versions.mix(DIVERSITY_QC.out.versions)
 
 	} else {
 		System.out.println "Le QC des donnÃ©es ${params.data_nature} n'a pas de sub-workflow spÃ©cifique pour le moment."
 		ch_mqc = ch_mqc.mix( Channel.empty() )
 	}
+
+	version_yaml = softwareVersionsToYAML(ch_versions)
+    	.collectFile(
+			storeDir: "${params.outdir}/pipeline_info",
+			name: 'software_mqc_versions.yml',
+			sort: true,
+			newLine: true
+		)
 	
 	MULTIQC(WORKFLOW_SUMMARY.out.ifEmpty([])
 		.mix(
 			CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]),
 			CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]),
 			CORE.out.fastp_report.collect{it[1]}.ifEmpty([]),
-			ch_mqc.collect().ifEmpty([])
+			ch_mqc.collect().ifEmpty([]),
+			version_yaml
 		).collect()
 	)
 
-- 
GitLab


From 7669d78bccbcf2c32b139fee993b526fedc1621c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 4 Sep 2024 10:35:55 +0200
Subject: [PATCH 4/9] Remove useless CICD files

---
 .gitlab-ci.yml | 40 ----------------------------------------
 Dockerfile     |  7 -------
 2 files changed, 47 deletions(-)
 delete mode 100644 .gitlab-ci.yml
 delete mode 100644 Dockerfile

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index e336847..0000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# recipe for building singularity image and deploy it on the registery for template
-image:
-   name: nextflow/nextflow
-   entrypoint: [""]
-
-stages:
- - build
- - deploy
- - test
- 
-push:
-   stage: test
-   script:
-     - nextflow run ./main.nf
-     - nextflow run ./main.nf --help
-
-# Build Singularity container bwa_v0.7.17.sif
-singularity-image:
-   image: quay.io/singularity/singularity:v3.4.0
-   stage: build
-   script:
-       - singularity build template.sif Singularityfile
-   artifacts:
-      paths:
-       - template.sif
-   only:
-    changes:
-      - Singularityfile
-      - environment.yml
-
-# Push the image template.sif on the registry
-deploy:
-   image: quay.io/singularity/singularity:v3.4.0
-   stage: deploy
-   script:
-       - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" template.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG"
-   only:
-      changes:
-         - Singularityfile
-         - environment.yml
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 167ec9d..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,7 +0,0 @@
-FROM nfcore/base:1.7
-LABEL authors="CÃ©line Noirot" \
-      description="Docker image containing all requirements for get/template pipeline"
-
-COPY environment.yml /
-RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/GeT-template-1.0dev/bin:$PATH
-- 
GitLab


From 0739b4a7ae4c206f03d9b267c5551086c31fe418 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 9 Sep 2024 14:08:15 +0200
Subject: [PATCH 5/9] treatment demuxStat logs has new file names

	Ref: #110
---
 modules/local/module_NGL-Bi.nf       | 3 ++-
 sub-workflows/local/core_illumina.nf | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index c32681f..995ebf5 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -26,6 +26,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA {
 	input:
 		val nglCode
 		path csvFile
+		val lane
 
 	output:
 		path("*.log")
@@ -34,7 +35,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA {
 	script:
 	def args = task.ext.args ?: ''
 	forceOption = workflow.resume ? "--force" : ''
-	def lane = params.lane ?: '0'
+	def level = lane ? "run_${lane}" : 'readsets'
 	"""
 		perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\
 			--code $nglCode \\
diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf
index 5530f45..bb1c476 100644
--- a/sub-workflows/local/core_illumina.nf
+++ b/sub-workflows/local/core_illumina.nf
@@ -57,8 +57,8 @@ workflow CORE_ILLUMINA {
 
 		if (params.insert_to_ngl){
 			// Add demultiplexStat treatments
-			TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV)
-			TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV)
+			TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV, params.lane)
+			TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV, '')
 		}
 
     emit:
-- 
GitLab


From 0dbf55a93a381b5066f2607ec2a9def015445b18 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 9 Sep 2024 16:36:19 +0200
Subject: [PATCH 6/9] Get old readsets file if exists

	Ref: #108
---
 conf/prod.config | 2 +-
 conf/test.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/prod.config b/conf/prod.config
index 4f9b19c..eb1bd1a 100644
--- a/conf/prod.config
+++ b/conf/prod.config
@@ -7,7 +7,7 @@ process {
 		publishDir = [
             path: "${params.outdir}/ngl",
             mode: 'copy',
-			pattern: "*.{log,created}"
+			pattern: "*.{log,created,existing}"
         ]
 	}
 }
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index 4294ca1..c2de5cd 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -14,7 +14,7 @@ process {
 		publishDir = [
             path: "${params.outdir}/ngl",
             mode: 'copy',
-			pattern: "*.{log,created}"
+			pattern: "*.{log,created,existing}"
         ]
 	}
 
-- 
GitLab


From 31f164934e31f23abba63a88f561990e275cca67 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 10 Sep 2024 11:31:26 +0200
Subject: [PATCH 7/9] update version number

---
 conf/report.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/report.config b/conf/report.config
index 12ef314..5ee8982 100644
--- a/conf/report.config
+++ b/conf/report.config
@@ -29,5 +29,5 @@ manifest {
 	description = "Workflow for Illumina data quality control"
 	mainScript = 'main.nf'
 	nextflowVersion = '>=0.32.0'
-	version = '1.20.0'
+	version = '1.23.0'
 }
\ No newline at end of file
-- 
GitLab


From 1b272703bfbe2b63d95176aae69b830e8ea6c77d Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 10 Sep 2024 11:32:30 +0200
Subject: [PATCH 8/9] Add default value in docs/usage.md

---
 docs/usage.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 6bf72b3..7831e2c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -64,7 +64,7 @@ _Default_ : null
 
 - **`--host`** [str]  
 The name of the server on which the pipeline is launched. This value is used to select slurm modules to load.   
-_Default_ : genologin
+_Default_ : genobioinfo
 
 - **`--shared_modules`** [str]  
 Path to the shared_modules sources. This is nextflow modules shared between several pipelines.    
@@ -195,7 +195,7 @@ _Default_ : 0.1
 
 - **`--assignation_databank`** [str]  
 Path to the databank for taxonomic assignment.  
-_Default_ : null
+_Default_ : /save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial
 
 - **`--blast_outfmt`** [int]  
 BLAST output format.  
-- 
GitLab


From 259fab8c77435ea23656f09918e28563ad3aad5b Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 10 Sep 2024 11:33:15 +0200
Subject: [PATCH 9/9] Duplicated_reads process earn task.ext.args

---
 modules/local/module_core.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index ccb92b7..be422b7 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -126,7 +126,7 @@ process DUPLICATED_READS {
 	shell:
 	R1_name=file(fastq[0]).simpleName
 	R2_name=file(fastq[1]).simpleName
-	def args = task.ext.args ?: ''
+	args = task.ext.args ?: ''
 	analyse_type = task.ext.analyse_type ?: params.default_label
 	'''
 		fastp \
-- 
GitLab