#!/usr/bin/env bash
# Module 4: Read Alignment Pipeline
# Usage: bash align_pipeline.sh <sample_name>
#   e.g. bash align_pipeline.sh SRR7890001

set -euo pipefail

SAMPLE="${1:-SRR7890001}"
REF="data/reference/chr22.fa"
TRIM_DIR="results/${SAMPLE}/trimmed"
ALIGN_DIR="results/${SAMPLE}/aligned"
THREADS=4

mkdir -p "${ALIGN_DIR}"

echo "=== Alignment Pipeline: ${SAMPLE} ==="

# Step 1: Build index (only needs to run once)
if [ ! -f "${REF}.bwt.2bit.64" ]; then
  echo "[0/5] Building BWA-MEM2 index (one-time, ~2 min)..."
  bwa-mem2 index "${REF}"
fi

# Step 2: Align
echo "[1/5] Aligning with BWA-MEM2..."
bwa-mem2 mem \
  -t "${THREADS}" \
  -R "@RG\tID:${SAMPLE}\tSM:${SAMPLE}\tPL:ILLUMINA\tLB:lib1" \
  "${REF}" \
  "${TRIM_DIR}/${SAMPLE}_R1_paired.fastq.gz" \
  "${TRIM_DIR}/${SAMPLE}_R2_paired.fastq.gz" \
  2> "${ALIGN_DIR}/${SAMPLE}_bwa.log" \
  | samtools view -bS - \
  > "${ALIGN_DIR}/${SAMPLE}_unsorted.bam"

# Step 3: Sort
echo "[2/5] Sorting BAM..."
samtools sort \
  -@ "${THREADS}" \
  -o "${ALIGN_DIR}/${SAMPLE}_sorted.bam" \
  "${ALIGN_DIR}/${SAMPLE}_unsorted.bam"
rm "${ALIGN_DIR}/${SAMPLE}_unsorted.bam"

# Step 4: Index
echo "[3/5] Indexing BAM..."
samtools index "${ALIGN_DIR}/${SAMPLE}_sorted.bam"

# Step 5: QC stats
echo "[4/5] Alignment QC..."
samtools flagstat "${ALIGN_DIR}/${SAMPLE}_sorted.bam" \
  > "${ALIGN_DIR}/${SAMPLE}_flagstat.txt"
samtools idxstats "${ALIGN_DIR}/${SAMPLE}_sorted.bam" \
  > "${ALIGN_DIR}/${SAMPLE}_idxstats.txt"

# Coverage
echo "[5/5] Computing coverage depth..."
samtools depth -a "${ALIGN_DIR}/${SAMPLE}_sorted.bam" \
  | awk '{sum+=$3; count++} END {printf "Mean depth: %.2fx\n", sum/count}' \
  | tee "${ALIGN_DIR}/${SAMPLE}_coverage_summary.txt"

echo ""
echo "Done! BAM at: ${ALIGN_DIR}/${SAMPLE}_sorted.bam"
echo "Load in IGV: File > Load from File > select .bam (the .bai index must be in same dir)"
cat "${ALIGN_DIR}/${SAMPLE}_flagstat.txt"
