# Bioinformatics Workshop — full real-tools conda environment.
#
# Build:   conda env create -f conda_env.yml     (or: mamba env create -f conda_env.yml)
# Use:     conda activate bioworkshop
#
# This single environment covers BOTH tiers:
#   - the Python scientific stack the notebooks need (pandas/numpy/...), and
#   - every command-line tool + R/Bioconductor package the pipeline scripts run
#     (fastp, samtools, bcftools, DESeq2, ...).
#
# All R/Bioconductor packages are installed from bioconda as prebuilt binaries
# (the `bioconductor-*` / `r-*` entries) rather than compiled at runtime via
# BiocManager — that is what makes a classroom install finish in minutes instead
# of failing on a compiler timeout. setup.sh keeps a BiocManager fallback only
# if a binary is unavailable for your platform (e.g. Apple Silicon).
name: bioworkshop
channels:
  - conda-forge
  - bioconda
  - defaults
dependencies:
  # --- Core ---
  - python=3.11
  - r-base=4.3
  - jupyterlab
  - notebook
  - ipywidgets
  - tqdm

  # --- Sequence utilities / QC ---
  - seqkit=2.8.0          # used in Module 2 (real-tool spine); was MISSING
  - fastqc=0.12.1
  - multiqc=1.21
  - trimmomatic=0.39
  - fastp=0.23.4

  # --- Alignment ---
  - bwa=0.7.18            # classic bwa (Module 1 illustrative + general use); was MISSING
  - bwa-mem2=2.2.1
  - hisat2=2.2.1
  - samtools=1.19.2
  - htslib=1.19.1         # provides tabix + bgzip (do not list those separately)

  # --- Variant calling / annotation ---
  - bcftools=1.19
  - bedtools=2.31.1
  - gatk4=4.5.0.0
  - picard=3.1.1
  - snpeff=5.2            # bioconda build; provides the `snpEff` launcher
  - igvtools=2.16.2       # headless IGV index/coverage (GUI IGV is desktop-only, run on host)

  # --- RNA-seq quantification ---
  - subread=2.0.6         # featureCounts
  - salmon=1.10.2

  # --- R / Bioconductor (binary, from bioconda) ---
  - bioconductor-deseq2
  - bioconductor-clusterprofiler
  - bioconductor-enhancedvolcano
  - bioconductor-pathview
  - bioconductor-annotationdbi
  - bioconductor-org.hs.eg.db     # human annotation
  - bioconductor-org.dm.eg.db     # Drosophila annotation (pasilla example); was MISSING
  - bioconductor-pasilla          # example dataset used by deseq2_analysis.R; was MISSING
  - r-ggplot2
  - r-pheatmap

  # --- Python scientific stack (single source of truth; mirrors requirements.txt) ---
  - pip
  - pip:
    - biopython==1.83
    - pandas==2.2.2
    - numpy==1.26.4
    - matplotlib==3.9.0
    - seaborn==0.13.2
    - scipy==1.13.1
    - scikit-learn==1.4.2
    - pysam==0.22.1
    - pyvcf3==1.0.3
