#!/usr/bin/env bash #SBATCH --job-name=clair3_hifi #SBATCH --output=logs/%x_%j.out #SBATCH --error=logs/%x_%j.err #SBATCH --time=12:00:00 #SBATCH --cpus-per-task=8 #SBATCH --mem=32G # Optional (uncomment if your cluster needs these) # #SBATCH --partition=compute # #SBATCH --account=YOUR_ACCOUNT set -euo pipefail # ========================================================== # USER SETTINGS (students edit these) # ========================================================== REFERENCE="data/reference.fasta" # FASTA READS="data/sample.fastq.gz" # HiFi FASTQ(.gz), single file SAMPLE="sample1" MODEL_PATH="/shared/clair3_models/hifi" # Clair3 HiFi model directory (host path) OUTDIR="results" mkdir -p containers singularity pull containers/minimap2.sif docker://quay.io/biocontainers/minimap2:2.28--he4a0461_0 singularity pull containers/samtools.sif docker://quay.io/biocontainers/samtools:1.20--h50ea8bc_0 singularity pull containers/clair3.sif docker://hkubal/clair3:latest MINIMAP2_IMG="containers/minimap2.sif" SAMTOOLS_IMG="containers/samtools.sif" CLAIR3_IMG="containers/clair3.sif" # ========================================================== # ENVIRONMENT SETUP # ========================================================== module purge || true module load singularity || module load apptainer || true cd "${SLURM_SUBMIT_DIR}" mkdir -p logs "${OUTDIR}" # Use SLURM allocation for threading THREADS="${SLURM_CPUS_PER_TASK:-8}" export OMP_NUM_THREADS="${THREADS}" export OPENBLAS_NUM_THREADS="${THREADS}" export MKL_NUM_THREADS="${THREADS}" # Singularity cache/tmp (helps on many HPCs) export SINGULARITY_CACHEDIR="${SLURM_SUBMIT_DIR}/.singularity_cache" export SINGULARITY_TMPDIR="${SLURM_SUBMIT_DIR}/.singularity_tmp" mkdir -p "${SINGULARITY_CACHEDIR}" "${SINGULARITY_TMPDIR}" # If your site requires explicit bind mounts, uncomment and edit: # export SINGULARITY_BINDPATH="/shared,/scratch,${SLURM_SUBMIT_DIR}" # ========================================================== # INPUT/OUTPUT PATHS # ========================================================== SAM="${OUTDIR}/${SAMPLE}.sam" BAM="${OUTDIR}/${SAMPLE}.sorted.bam" VCF="${OUTDIR}/${SAMPLE}.vcf.gz" VCF_TBI="${OUTDIR}/${SAMPLE}.vcf.gz.tbi" CLAIR3_OUT="${OUTDIR}/${SAMPLE}.clair3_out" # ========================================================== # BASIC CHECKS # ========================================================== echo "=== HiFi Variant Calling (SLURM, no Nextflow) ===" echo "Job ID: ${SLURM_JOB_ID:-NA}" echo "Sample: ${SAMPLE}" echo "Reference: ${REFERENCE}" echo "Reads: ${READS}" echo "Model path: ${MODEL_PATH}" echo "Threads: ${THREADS}" echo "Outdir: ${OUTDIR}" echo "=================================================" [[ -s "${REFERENCE}" ]] || { echo "ERROR: reference FASTA missing/empty: ${REFERENCE}" >&2; exit 1; } [[ -s "${READS}" ]] || { echo "ERROR: reads FASTQ missing/empty: ${READS}" >&2; exit 1; } [[ -d "${MODEL_PATH}" ]]|| { echo "ERROR: Clair3 model directory not found: ${MODEL_PATH}" >&2; exit 1; } # ========================================================== # STEP 0: VERSIONS (sanity) # ========================================================== echo "" echo "[0/3] Tool versions" singularity exec "${MINIMAP2_IMG}" minimap2 --version | head -n 1 || true singularity exec "${SAMTOOLS_IMG}" samtools --version | head -n 2 || true singularity exec "${CLAIR3_IMG}" bash -lc 'command -v run_clair3.sh && echo "Clair3: run_clair3.sh found"' || true # ========================================================== # STEP 1: ALIGNMENT (minimap2 map-hifi) # ========================================================== echo "" echo "[1/3] minimap2 alignment -> ${SAM}" singularity exec "${MINIMAP2_IMG}" \ minimap2 -t "${THREADS}" -a -x map-hifi "${REFERENCE}" "${READS}" > "${SAM}" # ========================================================== # STEP 2: SORT + INDEX (samtools) # ========================================================== echo "" echo "[2/3] samtools sort/index -> ${BAM}" singularity exec "${SAMTOOLS_IMG}" \ samtools sort -@ "${THREADS}" -o "${BAM}" "${SAM}" singularity exec "${SAMTOOLS_IMG}" \ samtools index -@ "${THREADS}" "${BAM}" # ========================================================== # STEP 3: VARIANT CALLING (Clair3 HiFi) # ========================================================== echo "" echo "[3/3] Clair3 calling -> ${VCF}" rm -rf "${CLAIR3_OUT}" mkdir -p "${CLAIR3_OUT}" # MODEL_PATH must be a host path visible inside the container (autoMounts usually handles this). singularity exec "${CLAIR3_IMG}" bash -lc " set -euo pipefail run_clair3.sh \ --bam_fn '${BAM}' \ --ref_fn '${REFERENCE}' \ --threads '${THREADS}' \ --platform hifi \ --model_path '${MODEL_PATH}' \ --output '${CLAIR3_OUT}' " # Normalize Clair3 output into predictable file names if [[ -f "${CLAIR3_OUT}/merge_output.vcf.gz" ]]; then cp "${CLAIR3_OUT}/merge_output.vcf.gz" "${VCF}" cp "${CLAIR3_OUT}/merge_output.vcf.gz.tbi" "${VCF_TBI}" else FOUND_VCF="$(find "${CLAIR3_OUT}" -maxdepth 4 -name '*.vcf.gz' | head -n 1 || true)" [[ -n "${FOUND_VCF}" ]] || { echo "ERROR: Clair3 did not produce a VCF." >&2; exit 1; } cp "${FOUND_VCF}" "${VCF}" if [[ -f "${FOUND_VCF}.tbi" ]]; then cp "${FOUND_VCF}.tbi" "${VCF_TBI}" else echo "WARNING: VCF index (.tbi) not found. Attempting tabix indexing if available..." if singularity exec "${CLAIR3_IMG}" bash -lc "command -v tabix >/dev/null 2>&1"; then singularity exec "${CLAIR3_IMG}" tabix -p vcf "${VCF}" else echo "WARNING: tabix not available; no .tbi created." fi fi fi # ========================================================== # DONE # ========================================================== echo "" echo "=================================================" echo "DONE" echo "BAM: ${BAM}" echo "VCF: ${VCF}" echo "================================================="