Last active
February 2, 2026 07:51
-
-
Save tahashmi/4d5a62c12f6952ad742cd14a98f5fa70 to your computer and use it in GitHub Desktop.
a sample slurm + singularity variant calling pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| #SBATCH --job-name=clair3_hifi | |
| #SBATCH --output=logs/%x_%j.out | |
| #SBATCH --error=logs/%x_%j.err | |
| #SBATCH --time=12:00:00 | |
| #SBATCH --cpus-per-task=8 | |
| #SBATCH --mem=32G | |
| # Optional (uncomment if your cluster needs these) | |
| # #SBATCH --partition=compute | |
| # #SBATCH --account=YOUR_ACCOUNT | |
| set -euo pipefail | |
| # ========================================================== | |
| # USER SETTINGS (students edit these) | |
| # ========================================================== | |
| REFERENCE="data/reference.fasta" # FASTA | |
| READS="data/sample.fastq.gz" # HiFi FASTQ(.gz), single file | |
| SAMPLE="sample1" | |
| MODEL_PATH="/shared/clair3_models/hifi" # Clair3 HiFi model directory (host path) | |
| OUTDIR="results" | |
| mkdir -p containers | |
| singularity pull containers/minimap2.sif docker://quay.io/biocontainers/minimap2:2.28--he4a0461_0 | |
| singularity pull containers/samtools.sif docker://quay.io/biocontainers/samtools:1.20--h50ea8bc_0 | |
| singularity pull containers/clair3.sif docker://hkubal/clair3:latest | |
| MINIMAP2_IMG="containers/minimap2.sif" | |
| SAMTOOLS_IMG="containers/samtools.sif" | |
| CLAIR3_IMG="containers/clair3.sif" | |
| # ========================================================== | |
| # ENVIRONMENT SETUP | |
| # ========================================================== | |
| module purge || true | |
| module load singularity || module load apptainer || true | |
| cd "${SLURM_SUBMIT_DIR}" | |
| mkdir -p logs "${OUTDIR}" | |
| # Use SLURM allocation for threading | |
| THREADS="${SLURM_CPUS_PER_TASK:-8}" | |
| export OMP_NUM_THREADS="${THREADS}" | |
| export OPENBLAS_NUM_THREADS="${THREADS}" | |
| export MKL_NUM_THREADS="${THREADS}" | |
| # Singularity cache/tmp (helps on many HPCs) | |
| export SINGULARITY_CACHEDIR="${SLURM_SUBMIT_DIR}/.singularity_cache" | |
| export SINGULARITY_TMPDIR="${SLURM_SUBMIT_DIR}/.singularity_tmp" | |
| mkdir -p "${SINGULARITY_CACHEDIR}" "${SINGULARITY_TMPDIR}" | |
| # If your site requires explicit bind mounts, uncomment and edit: | |
| # export SINGULARITY_BINDPATH="/shared,/scratch,${SLURM_SUBMIT_DIR}" | |
| # ========================================================== | |
| # INPUT/OUTPUT PATHS | |
| # ========================================================== | |
| SAM="${OUTDIR}/${SAMPLE}.sam" | |
| BAM="${OUTDIR}/${SAMPLE}.sorted.bam" | |
| VCF="${OUTDIR}/${SAMPLE}.vcf.gz" | |
| VCF_TBI="${OUTDIR}/${SAMPLE}.vcf.gz.tbi" | |
| CLAIR3_OUT="${OUTDIR}/${SAMPLE}.clair3_out" | |
| # ========================================================== | |
| # BASIC CHECKS | |
| # ========================================================== | |
| echo "=== HiFi Variant Calling (SLURM, no Nextflow) ===" | |
| echo "Job ID: ${SLURM_JOB_ID:-NA}" | |
| echo "Sample: ${SAMPLE}" | |
| echo "Reference: ${REFERENCE}" | |
| echo "Reads: ${READS}" | |
| echo "Model path: ${MODEL_PATH}" | |
| echo "Threads: ${THREADS}" | |
| echo "Outdir: ${OUTDIR}" | |
| echo "=================================================" | |
| [[ -s "${REFERENCE}" ]] || { echo "ERROR: reference FASTA missing/empty: ${REFERENCE}" >&2; exit 1; } | |
| [[ -s "${READS}" ]] || { echo "ERROR: reads FASTQ missing/empty: ${READS}" >&2; exit 1; } | |
| [[ -d "${MODEL_PATH}" ]]|| { echo "ERROR: Clair3 model directory not found: ${MODEL_PATH}" >&2; exit 1; } | |
| # ========================================================== | |
| # STEP 0: VERSIONS (sanity) | |
| # ========================================================== | |
| echo "" | |
| echo "[0/3] Tool versions" | |
| singularity exec "${MINIMAP2_IMG}" minimap2 --version | head -n 1 || true | |
| singularity exec "${SAMTOOLS_IMG}" samtools --version | head -n 2 || true | |
| singularity exec "${CLAIR3_IMG}" bash -lc 'command -v run_clair3.sh && echo "Clair3: run_clair3.sh found"' || true | |
| # ========================================================== | |
| # STEP 1: ALIGNMENT (minimap2 map-hifi) | |
| # ========================================================== | |
| echo "" | |
| echo "[1/3] minimap2 alignment -> ${SAM}" | |
| singularity exec "${MINIMAP2_IMG}" \ | |
| minimap2 -t "${THREADS}" -a -x map-hifi "${REFERENCE}" "${READS}" > "${SAM}" | |
| # ========================================================== | |
| # STEP 2: SORT + INDEX (samtools) | |
| # ========================================================== | |
| echo "" | |
| echo "[2/3] samtools sort/index -> ${BAM}" | |
| singularity exec "${SAMTOOLS_IMG}" \ | |
| samtools sort -@ "${THREADS}" -o "${BAM}" "${SAM}" | |
| singularity exec "${SAMTOOLS_IMG}" \ | |
| samtools index -@ "${THREADS}" "${BAM}" | |
| # ========================================================== | |
| # STEP 3: VARIANT CALLING (Clair3 HiFi) | |
| # ========================================================== | |
| echo "" | |
| echo "[3/3] Clair3 calling -> ${VCF}" | |
| rm -rf "${CLAIR3_OUT}" | |
| mkdir -p "${CLAIR3_OUT}" | |
| # MODEL_PATH must be a host path visible inside the container (autoMounts usually handles this). | |
| singularity exec "${CLAIR3_IMG}" bash -lc " | |
| set -euo pipefail | |
| run_clair3.sh \ | |
| --bam_fn '${BAM}' \ | |
| --ref_fn '${REFERENCE}' \ | |
| --threads '${THREADS}' \ | |
| --platform hifi \ | |
| --model_path '${MODEL_PATH}' \ | |
| --output '${CLAIR3_OUT}' | |
| " | |
| # Normalize Clair3 output into predictable file names | |
| if [[ -f "${CLAIR3_OUT}/merge_output.vcf.gz" ]]; then | |
| cp "${CLAIR3_OUT}/merge_output.vcf.gz" "${VCF}" | |
| cp "${CLAIR3_OUT}/merge_output.vcf.gz.tbi" "${VCF_TBI}" | |
| else | |
| FOUND_VCF="$(find "${CLAIR3_OUT}" -maxdepth 4 -name '*.vcf.gz' | head -n 1 || true)" | |
| [[ -n "${FOUND_VCF}" ]] || { echo "ERROR: Clair3 did not produce a VCF." >&2; exit 1; } | |
| cp "${FOUND_VCF}" "${VCF}" | |
| if [[ -f "${FOUND_VCF}.tbi" ]]; then | |
| cp "${FOUND_VCF}.tbi" "${VCF_TBI}" | |
| else | |
| echo "WARNING: VCF index (.tbi) not found. Attempting tabix indexing if available..." | |
| if singularity exec "${CLAIR3_IMG}" bash -lc "command -v tabix >/dev/null 2>&1"; then | |
| singularity exec "${CLAIR3_IMG}" tabix -p vcf "${VCF}" | |
| else | |
| echo "WARNING: tabix not available; no .tbi created." | |
| fi | |
| fi | |
| fi | |
| # ========================================================== | |
| # DONE | |
| # ========================================================== | |
| echo "" | |
| echo "=================================================" | |
| echo "DONE" | |
| echo "BAM: ${BAM}" | |
| echo "VCF: ${VCF}" | |
| echo "=================================================" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment