"""Demultiplex pipeline

Author: Thomas Cokelaer
Affiliation: Institut Pasteur @ 2020

This pipeline is part of Sequana software (sequana.readthedocs.io)

You will need bcl2fastq/2.20.0 Please see REAME and wiki on https://github.com/sequana/sequana_demultiplex


http://emea.support.illumina.com/downloads/bcl2fastq-conversion-software-v2-20.html?langsel=/fr/
"""
from sequana import snaketools as sm
import os
import json

# This must be defined before the include
configfile: "config.yaml"

# A convenient manager
manager = sm.PipelineManagerDirectory("demultiplex", config)
manager.setup(globals(), mode="error")

# an alias
cfg = config['bcl2fastq']
outdir = os.path.abspath(cfg["output_directory"])

if cfg["samplesheet_file"].strip() != "":
    if os.path.exists(cfg['samplesheet_file']) is False:
        raise IOError("Sample sheet {} does not exist".format(cfg['samplesheet_file']))


rule all:
    input:
        outdir + "/Stats/summary.txt",
        outdir + "/undetermined_barcodes.csv",
        outdir + "/barcodes.png",
        outdir + "/samples.png",
        outdir + "/summary.png",
        ".sequana/rulegraph.svg",


rule plot_unknown_barcodes:
    input: outdir + "/Stats/Stats.json"
    output:
        csv=outdir + "/undetermined_barcodes.csv",
        png=outdir + "/barcodes.png"
    run:
        from sequana.demultiplex import StatsFile
        s = StatsFile(input[0])
        df = s.plot_unknown_barcodes()
        from pylab import savefig
        savefig(output.png, dpi=200)
        df.to_csv(output.csv)

rule check_samplesheet:
    input: cfg['samplesheet_file']
    output: temp("ss.log")
    shell:
        """
        sequana_check_samplesheet -s {input[0]} 2> {output[0]}
        """


rule bcl2fastq:
    input: "ss.log"
    output:
        outdir + "/Stats/Stats.json",
    params:
        indir= config["input_directory"],
        outdir= outdir,
        samplesheet=cfg['samplesheet_file'],
        barcode_mismatch=cfg['barcode_mismatch']
    threads: cfg['threads']
    run:

        cmd = "bcl2fastq -p {threads} --barcode-mismatches {params.barcode_mismatch}"
        #cmd += " --input-dir {}/Data/Intensities/BaseCalls".format(params.indir)
        cmd += " --runfolder-dir {}".format(params.indir)
        cmd += " --intensities-dir {}/Data/Intensities".format(params.indir)
        if params.samplesheet.strip()!= "":
            cmd += " --sample-sheet {}".format(params.samplesheet)
        cmd += " --output-dir {}".format(os.path.abspath(params.outdir))

        # deprecated according to bcl2fastq documentation 2.20
        #if cfg['ignore_missing_controls']:
        #    cmd += " --ignore-missing-controls "
        if cfg['ignore_missing_bcls']:
            cmd += " --ignore-missing-bcls "
        if cfg['no_bgzf_compression']:
            cmd += " --no-bgzf-compression "

        if cfg['merge_all_lanes']:
            cmd += " --no-lane-splitting "

        if cfg['write_fastq_reverse_complement']:
            cmd += " --write-fastq-reverse-complement"

        cmd += cfg['options']

        shell(cmd)

rule plot_barplot_samples:
    input: outdir + "/Stats/Stats.json"
    output:
        barplot=outdir + "/samples.png"
    run:
        from sequana.demultiplex import StatsFile
        s = StatsFile(input[0])
        s.barplot_per_sample(filename=output.barplot)


rule plot_summary:
    input: outdir + "/Stats/Stats.json"
    output:
        summary=outdir + "/Stats/summary.txt",
        barplot=outdir + "/summary.png"
    run:
        from sequana.demultiplex import StatsFile
        s = StatsFile(input[0])
        s.barplot_summary(filename=output.barplot)
        # save summary at the end because barplot output is not set.
        s.to_summary_reads(output.summary)

__rulegraph__input = manager.snakefile
__rulegraph__output = ".sequana/rulegraph.svg"
__rulegraph__mapper = {}
include: sm.modules['rulegraph']
localrules: rulegraph, check_samplesheet


onsuccess:
    shell("chmod -R g+w .")
    manager.teardown()

    from sequana.modules_report.summary import SummaryModule2

    image1 = SummaryModule2.png_to_embedded_png("dummy", "barcodes.png",
                style="text-align:center; width:60%; height:40%", 
                alt="barcodes")

    image2 = SummaryModule2.png_to_embedded_png("dummy", "summary.png",
                style="width:60%; height:40%", alt="summary")

    image3 = SummaryModule2.png_to_embedded_png("dummy", "samples.png",
                style="width:60%; height:40%", alt="sample")


    intro = """<div>
<p style="text-align:justify">This report summarizes the demultiplexing of your raw data. Some help to interpret the following plots can be found on the pipeline <a href="https://github.com/sequana/sequana_demultiplex">home page</a> and  <a href="https://github.com/sequana/sequana_demultiplex/wiki"> wiki</a>.</p>

<br>
<br>

<p style="text-align:justify">The following image shows the indices found in the most Undetermined barcodes per lane. An index in excess may indicate a wrong SampleSheet with a typo in a given index.</p> 
<br><br>{} <hr> 
<p style="text-align:justify">
The following image shows the ratio of determined/undetermined reads after demultiplexing. Again, an excess of undetermined (larger than 10-20%) may indicate a wrongly labelled sample in your sample sheet.
</p>  <br><br>{}<hr> 
<p style="text-align:justify">
The following image is similar. Instead of showing the index, we show here the number of reads per sample.</p>
<br>{}


</div>""".format(image1, image2, image3)

    from sequana_pipelines import demultiplex
    data = {
            "name": manager.name,
            "stats": "stats.txt", 
            "rulegraph": __rulegraph__output,
            "pipeline_version": demultiplex.version
            }
    s = SummaryModule2(data, intro=intro)
    shell("rm -rf rulegraph")

onerror:
    print("An error occurred. See message above.")