Skip to content

Instantly share code, notes, and snippets.

@elston
Last active May 19, 2025 13:10
Show Gist options
  • Select an option

  • Save elston/b54899209f4a74fedfb550ebdfb39205 to your computer and use it in GitHub Desktop.

Select an option

Save elston/b54899209f4a74fedfb550ebdfb39205 to your computer and use it in GitHub Desktop.
HuBERT-ECG demo with ptb-xl
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "610604c2-1874-42ba-85f5-5246087dc36b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from utils import dataset_processing"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cf2458f9-b3eb-4b83-a4a4-96f9bb47fd57",
"metadata": {},
"outputs": [],
"source": [
"path = '/data/ptb-xl'\n",
"path_norm = '/shared/ptb-xl'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9d58acd2-2aa4-4812-9f2d-51f847e0dea2",
"metadata": {},
"outputs": [],
"source": [
"Y = pd.read_csv(f'{path}/ptbxl_database.csv', index_col='ecg_id')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2e386166-bd8d-48cb-8eb6-98e4d961a062",
"metadata": {},
"outputs": [],
"source": [
"filenames = Y.filename_hr.to_numpy()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dbd8d470-4b6b-4d09-9b99-0b1acfe8f0e8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(21799,)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filenames.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d9f419cd-e4cb-4df2-83fd-31388816a943",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['records500/00000/00001_hr', 'records500/00000/00002_hr',\n",
" 'records500/00000/00003_hr', ..., 'records500/21000/21835_hr',\n",
" 'records500/21000/21836_hr', 'records500/21000/21837_hr'],\n",
" dtype=object)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"filenames"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "70bb277d-414c-4fb8-b63d-488f8fdb729d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing ECG files: 100%|██████████| 21799/21799 [01:34<00:00, 229.62it/s]\n"
]
}
],
"source": [
"dataset_processing(filenames, path, path_norm)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
import os
import numpy as np
# from sklearn.utils import resample
from scipy.signal import resample
from biosppy.signals.tools import filter_signal
from tqdm import tqdm
import wfdb
def apply_filter(
signal,
filter_bandwidth,
fs=500
):
order = int(0.3 * fs)
signal, _, _ = filter_signal(
signal=signal,
ftype='FIR',
band='bandpass',
order=order,
frequency=filter_bandwidth,
sampling_rate=fs
)
return signal
def scaling(
ecg_signal,
smooth=1e-8
):
return 2 * (
ecg_signal - np.min(ecg_signal, axis=1)[None].T
) / (
np.max(ecg_signal, axis=1) - np.min(ecg_signal, axis=1) + smooth
)[None].T - 1
def ecg_preprocessing(
ecg_signal,
original_frequency,
# target_frequency=100,
band_pass=[0.05, 47]
)-> np.ndarray:
assert ecg_signal.shape[0] == 12, "ecg_signal should have (12, signal_length) shape for pre-processing"
num_samples = int(ecg_signal.shape[-1] * (500 / original_frequency))
ecg_signal = resample(ecg_signal, num_samples, axis=1)
ecg_signal = apply_filter(ecg_signal, band_pass)
return scaling(ecg_signal)
def dataset_processing(
filenames: list[str],
path_wfdb: str,
path_norm: str,
skip_existing=True
):
os.makedirs(path_norm, exist_ok=True)
for filename in tqdm(filenames, desc='Processing ECG files'):
output_filename = f"HR{os.path.basename(filename).replace('_hr', '.hea.npy')}"
output_path = os.path.join(path_norm, output_filename)
if skip_existing and os.path.exists(output_path):
continue
try:
signal, meta = wfdb.rdsamp(os.path.join(path_wfdb, filename))
signal = signal.T
if np.isnan(signal).any():
signal = np.nan_to_num(signal, nan=0.0)
signal_norm = ecg_preprocessing(signal, meta['fs'])
np.save(output_path, signal_norm)
except Exception as e:
print(f"Error processing {filename}: {str(e)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment