This report summarizes the demultiplexing of your raw data. Some help to interpret the following plots can be found on the pipeline home page and wiki.
The following image shows the indices found in the most Undetermined barcodes per lane. An index in excess may indicate a wrong SampleSheet with a typo in a given index.
The following image shows the ratio of determined/undetermined reads after demultiplexing. Again, an excess of undetermined (larger than 10-20%) may indicate a wrongly labelled sample in your sample sheet.
The following image is similar. Instead of showing the index, we show here the number of reads per sample.
The following network shows the workflow of the pipeline. Blue boxes are clickable and redirect to dedicated reports.
The analysis was performed with the following Snakemake and configfile:
"""Demultiplex pipeline
Author: Thomas Cokelaer
Affiliation: Institut Pasteur @ 2020
This pipeline is part of Sequana software (sequana.readthedocs.io)
You will need bcl2fastq/2.20.0 Please see REAME and wiki on https://github.com/sequana/sequana_demultiplex
http://emea.support.illumina.com/downloads/bcl2fastq-conversion-software-v2-20.html?langsel=/fr/
"""
from sequana import snaketools as sm
import os
import json
# This must be defined before the include
configfile: "config.yaml"
# A convenient manager
manager = sm.PipelineManagerDirectory("demultiplex", config)
manager.setup(globals(), mode="error")
# an alias
cfg = config['bcl2fastq']
outdir = os.path.abspath(cfg["output_directory"])
if cfg["samplesheet_file"].strip() != "":
if os.path.exists(cfg['samplesheet_file']) is False:
raise IOError("Sample sheet {} does not exist".format(cfg['samplesheet_file']))
rule all:
input:
outdir + "/Stats/summary.txt",
outdir + "/undetermined_barcodes.csv",
outdir + "/barcodes.png",
outdir + "/samples.png",
outdir + "/summary.png",
".sequana/rulegraph.svg",
rule plot_unknown_barcodes:
input: outdir + "/Stats/Stats.json"
output:
csv=outdir + "/undetermined_barcodes.csv",
png=outdir + "/barcodes.png"
run:
from sequana.demultiplex import StatsFile
s = StatsFile(input[0])
df = s.plot_unknown_barcodes()
from pylab import savefig
savefig(output.png, dpi=200)
df.to_csv(output.csv)
rule check_samplesheet:
input: cfg['samplesheet_file']
output: temp("ss.log")
shell:
"""
sequana_check_samplesheet -s {input[0]} 2> {output[0]}
"""
rule bcl2fastq:
input: "ss.log"
output:
outdir + "/Stats/Stats.json",
params:
indir= config["input_directory"],
outdir= outdir,
samplesheet=cfg['samplesheet_file'],
barcode_mismatch=cfg['barcode_mismatch']
threads: cfg['threads']
run:
cmd = "bcl2fastq -p {threads} --barcode-mismatches {params.barcode_mismatch}"
#cmd += " --input-dir {}/Data/Intensities/BaseCalls".format(params.indir)
cmd += " --runfolder-dir {}".format(params.indir)
cmd += " --intensities-dir {}/Data/Intensities".format(params.indir)
if params.samplesheet.strip()!= "":
cmd += " --sample-sheet {}".format(params.samplesheet)
cmd += " --output-dir {}".format(os.path.abspath(params.outdir))
# deprecated according to bcl2fastq documentation 2.20
#if cfg['ignore_missing_controls']:
# cmd += " --ignore-missing-controls "
if cfg['ignore_missing_bcls']:
cmd += " --ignore-missing-bcls "
if cfg['no_bgzf_compression']:
cmd += " --no-bgzf-compression "
if cfg['merge_all_lanes']:
cmd += " --no-lane-splitting "
if cfg['write_fastq_reverse_complement']:
cmd += " --write-fastq-reverse-complement"
cmd += cfg['options']
shell(cmd)
rule plot_barplot_samples:
input: outdir + "/Stats/Stats.json"
output:
barplot=outdir + "/samples.png"
run:
from sequana.demultiplex import StatsFile
s = StatsFile(input[0])
s.barplot_per_sample(filename=output.barplot)
rule plot_summary:
input: outdir + "/Stats/Stats.json"
output:
summary=outdir + "/Stats/summary.txt",
barplot=outdir + "/summary.png"
run:
from sequana.demultiplex import StatsFile
s = StatsFile(input[0])
s.barplot_summary(filename=output.barplot)
# save summary at the end because barplot output is not set.
s.to_summary_reads(output.summary)
__rulegraph__input = manager.snakefile
__rulegraph__output = ".sequana/rulegraph.svg"
__rulegraph__mapper = {}
include: sm.modules['rulegraph']
localrules: rulegraph, check_samplesheet
onsuccess:
shell("chmod -R g+w .")
manager.teardown()
from sequana.modules_report.summary import SummaryModule2
image1 = SummaryModule2.png_to_embedded_png("dummy", "barcodes.png",
style="text-align:center; width:60%; height:40%",
alt="barcodes")
image2 = SummaryModule2.png_to_embedded_png("dummy", "summary.png",
style="width:60%; height:40%", alt="summary")
image3 = SummaryModule2.png_to_embedded_png("dummy", "samples.png",
style="width:60%; height:40%", alt="sample")
intro = """
This report summarizes the demultiplexing of your raw data. Some help to interpret the following plots can be found on the pipeline home page and wiki.
The following image shows the indices found in the most Undetermined barcodes per lane. An index in excess may indicate a wrong SampleSheet with a typo in a given index.
{}
The following image shows the ratio of determined/undetermined reads after demultiplexing. Again, an excess of undetermined (larger than 10-20%) may indicate a wrongly labelled sample in your sample sheet.
{}
The following image is similar. Instead of showing the index, we show here the number of reads per sample.
{}
""".format(image1, image2, image3)
from sequana_pipelines import demultiplex
data = {
"name": manager.name,
"stats": "stats.txt",
"rulegraph": __rulegraph__output,
"pipeline_version": demultiplex.version
}
s = SummaryModule2(data, intro=intro)
shell("rm -rf rulegraph")
onerror:
print("An error occurred. See message above.")
#################################################################
# bcl2fastq
#
# :Parameters:
#
# intensities are expected to be found in input_directory/Data/Intensities
# Base call data are to be found in input_directory/Data/Intensities/BaseCalls
#
# if merge_all_lanes is set to True, merged all lanes. This must be used with
# NextSeq sequencers for instance.
#
input_directory: /pasteur/projets/specific/Biomics/Data/current/NextSeq/200804_NB501291_0260_AHTT55BGXF/bcl
###################################################################################
#
#
#
# --ignore_missing_bcls: interpret missing *.bcl files as no call (N)
# --write-fastq-reverse-complement: generate FASTQs containing reverse complements of actual data
# --no-bgzf-compression: turn off BGZF compression for FASTQ files
# --barcode-mismatches: number of allowed mismatches per index
# merge_all_lanes: if false, use the --no-lane-splitting option
bcl2fastq:
threads: 4
barcode_mismatch: 0
samplesheet_file: /pasteur/projets/specific/Biomics/Data/current/NextSeq/200804_NB501291_0260_AHTT55BGXF/fastq/SampleSheet.csv
output_directory: .
ignore_missing_bcls: true
no_bgzf_compression: true
options: ''
merge_all_lanes: true
write_fastq_reverse_complement: false
Dependencies downloaded from bioconda requirements
Python dependencies (Pypi)
package,version,link appdirs,1.4.3,https://pypi.python.org/pypi/appdirs atropos,1.1.24,https://pypi.python.org/pypi/atropos attrs,19.3.0,https://pypi.python.org/pypi/attrs backcall,0.1.0,https://pypi.python.org/pypi/backcall beautifulsoup4,4.8.1,https://pypi.python.org/pypi/beautifulsoup4 bioservices,1.7.7,https://pypi.python.org/pypi/bioservices bx-python,0.8.8,https://pypi.python.org/pypi/bx-python certifi,2019.11.28,https://pypi.python.org/pypi/certifi chardet,3.0.4,https://pypi.python.org/pypi/chardet Click,7.0,https://pypi.python.org/pypi/Click colorama,0.4.1,https://pypi.python.org/pypi/colorama coloredlogs,10.0,https://pypi.python.org/pypi/coloredlogs colorlog,4.0.2,https://pypi.python.org/pypi/colorlog colormap,1.0.3,https://pypi.python.org/pypi/colormap colormath,3.0.0,https://pypi.python.org/pypi/colormath ConfigArgParse,0.15.1,https://pypi.python.org/pypi/ConfigArgParse cycler,0.10.0,https://pypi.python.org/pypi/cycler Cython,0.29.14,https://pypi.python.org/pypi/Cython datrie,0.8,https://pypi.python.org/pypi/datrie decorator,4.4.1,https://pypi.python.org/pypi/decorator docopt,0.6.2,https://pypi.python.org/pypi/docopt docutils,0.15.2,https://pypi.python.org/pypi/docutils easydev,0.9.38,https://pypi.python.org/pypi/easydev future,0.18.2,https://pypi.python.org/pypi/future gevent,1.4.0,https://pypi.python.org/pypi/gevent gitdb2,2.0.6,https://pypi.python.org/pypi/gitdb2 GitPython,3.0.5,https://pypi.python.org/pypi/GitPython greenlet,0.4.15,https://pypi.python.org/pypi/greenlet grequests,0.4.0,https://pypi.python.org/pypi/grequests gseapy,0.9.18,https://pypi.python.org/pypi/gseapy humanfriendly,4.18,https://pypi.python.org/pypi/humanfriendly idna,2.8,https://pypi.python.org/pypi/idna importlib-metadata,0.23,https://pypi.python.org/pypi/importlib-metadata ipykernel,5.1.3,https://pypi.python.org/pypi/ipykernel ipython,7.10.0,https://pypi.python.org/pypi/ipython ipython-genutils,0.2.0,https://pypi.python.org/pypi/ipython-genutils itolapi,3.0.3,https://pypi.python.org/pypi/itolapi jedi,0.15.1,https://pypi.python.org/pypi/jedi Jinja2,2.10.3,https://pypi.python.org/pypi/Jinja2 joblib,0.14.1,https://pypi.python.org/pypi/joblib jsonschema,3.2.0,https://pypi.python.org/pypi/jsonschema jupyter-client,5.3.3,https://pypi.python.org/pypi/jupyter-client jupyter-core,4.6.1,https://pypi.python.org/pypi/jupyter-core kiwisolver,1.1.0,https://pypi.python.org/pypi/kiwisolver lxml,4.4.2,https://pypi.python.org/pypi/lxml lzstring,1.0.4,https://pypi.python.org/pypi/lzstring Markdown,3.1.1,https://pypi.python.org/pypi/Markdown MarkupSafe,1.1.1,https://pypi.python.org/pypi/MarkupSafe matplotlib,2.2.2,https://pypi.python.org/pypi/matplotlib matplotlib-venn,0.11.5,https://pypi.python.org/pypi/matplotlib-venn mock,3.0.5,https://pypi.python.org/pypi/mock more-itertools,7.2.0,https://pypi.python.org/pypi/more-itertools multiqc,1.8.dev0,https://pypi.python.org/pypi/multiqc networkx,2.4,https://pypi.python.org/pypi/networkx numpy,1.17.3,https://pypi.python.org/pypi/numpy packaging,19.2,https://pypi.python.org/pypi/packaging pandas,0.25.3,https://pypi.python.org/pypi/pandas parso,0.5.1,https://pypi.python.org/pypi/parso patsy,0.5.1,https://pypi.python.org/pypi/patsy pexpect,4.7.0,https://pypi.python.org/pypi/pexpect pickleshare,0.7.5,https://pypi.python.org/pypi/pickleshare prompt-toolkit,3.0.0,https://pypi.python.org/pypi/prompt-toolkit psutil,5.6.7,https://pypi.python.org/pypi/psutil ptyprocess,0.6.0,https://pypi.python.org/pypi/ptyprocess Pygments,2.5.1,https://pypi.python.org/pypi/Pygments pykwalify,1.6.0,https://pypi.python.org/pypi/pykwalify PyOpenGL,3.1.5,https://pypi.python.org/pypi/PyOpenGL pyparsing,2.4.5,https://pypi.python.org/pypi/pyparsing pyrsistent,0.15.6,https://pypi.python.org/pypi/pyrsistent pysam,0.15.3,https://pypi.python.org/pypi/pysam python-dateutil,2.8.1,https://pypi.python.org/pypi/python-dateutil pytz,2019.3,https://pypi.python.org/pypi/pytz PyVCF,0.6.8,https://pypi.python.org/pypi/PyVCF PyYAML,5.1.2,https://pypi.python.org/pypi/PyYAML pyzmq,18.1.1,https://pypi.python.org/pypi/pyzmq qtconsole,4.6.0,https://pypi.python.org/pypi/qtconsole ratelimiter,1.2.0.post0,https://pypi.python.org/pypi/ratelimiter requests,2.22.0,https://pypi.python.org/pypi/requests requests-cache,0.5.0,https://pypi.python.org/pypi/requests-cache ruamel.yaml,0.16.5,https://pypi.python.org/pypi/ruamel.yaml ruamel.yaml.clib,0.2.0,https://pypi.python.org/pypi/ruamel.yaml.clib scikit-learn,0.23.1,https://pypi.python.org/pypi/scikit-learn scipy,1.3.2,https://pypi.python.org/pypi/scipy sequana,0.9.0,https://pypi.python.org/pypi/sequana setuptools,42.0.1.post20191125,https://pypi.python.org/pypi/setuptools simplejson,3.17.0,https://pypi.python.org/pypi/simplejson six,1.13.0,https://pypi.python.org/pypi/six smmap2,2.0.5,https://pypi.python.org/pypi/smmap2 snakemake,5.8.1,https://pypi.python.org/pypi/snakemake soupsieve,1.9.4,https://pypi.python.org/pypi/soupsieve spectra,0.0.11,https://pypi.python.org/pypi/spectra statsmodels,0.11.1,https://pypi.python.org/pypi/statsmodels suds-jurko,0.6,https://pypi.python.org/pypi/suds-jurko threadpoolctl,2.1.0,https://pypi.python.org/pypi/threadpoolctl tornado,6.0.3,https://pypi.python.org/pypi/tornado traitlets,4.3.3,https://pypi.python.org/pypi/traitlets urllib3,1.25.7,https://pypi.python.org/pypi/urllib3 wcwidth,0.1.7,https://pypi.python.org/pypi/wcwidth wrapt,1.11.2,https://pypi.python.org/pypi/wrapt xlrd,1.2.0,https://pypi.python.org/pypi/xlrd XML2Dict,0.2.2,https://pypi.python.org/pypi/XML2Dict xmltodict,0.12.0,https://pypi.python.org/pypi/xmltodict zipp,0.6.0,https://pypi.python.org/pypi/zipp
package | version | link |
---|