Created
March 16, 2026 11:55
-
-
Save srugano/b90de8569c36bb9c6161aa963c6df076 to your computer and use it in GitHub Desktop.
Convert epub to audio.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import sys | |
| import os | |
| import zipfile | |
| from pathlib import Path | |
| import soundfile as sf | |
| import html | |
| import re | |
| from typing import List | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Monkey patch numpy to allow pickle loading | |
| import numpy | |
| numpy_load_old = numpy.load | |
| numpy.load = lambda *a, **k: numpy_load_old(*a, allow_pickle=True, **k) | |
| # Import kokoro-onnx | |
| try: | |
| from kokoro_onnx import Kokoro | |
| import kokoro_onnx | |
| version = getattr(kokoro_onnx, '__version__', 'unknown') | |
| print(f"β Kokoro-ONNX version: {version}") | |
| except ImportError as e: | |
| print(f"β οΈ Kokoro-ONNX import failed: {e}") | |
| sys.exit(1) | |
| def extract_text_from_epub_simple(epub_path: str) -> List[str]: | |
| chapters = [] | |
| try: | |
| with zipfile.ZipFile(epub_path, 'r') as z: | |
| html_files = [f for f in z.namelist() if f.endswith(('.html', '.xhtml', '.htm'))] | |
| html_files.sort() | |
| for html_file in html_files: | |
| with z.open(html_file) as f: | |
| content = f.read().decode('utf-8', errors='ignore') | |
| text = re.sub(r'<[^>]+>', ' ', content) | |
| text = html.unescape(text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| if len(text) > 500: | |
| chapters.append(text) | |
| print(f" π Found chapter {len(chapters)}: {len(text)} chars") | |
| except Exception as e: | |
| print(f"β Error reading EPUB: {e}") | |
| return [] | |
| return chapters | |
| def split_text_into_chunks(text: str, max_chunk_size: int = 1000) -> List[str]: | |
| """Split text into smaller chunks for better TTS processing""" | |
| # Split by sentences to avoid cutting in the middle | |
| sentences = re.split(r'(?<=[.!?])\s+', text) | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if len(current_chunk) + len(sentence) <= max_chunk_size: | |
| current_chunk += " " + sentence if current_chunk else sentence | |
| else: | |
| if current_chunk: | |
| chunks.append(current_chunk) | |
| current_chunk = sentence | |
| if current_chunk: | |
| chunks.append(current_chunk) | |
| return chunks | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: python convert_epub.py your_book.epub [voice_name]") | |
| print("\nAvailable voices:") | |
| print(" af_sky - American female (recommended)") | |
| print(" af_bella - American female, warm") | |
| print(" am_adam - American male, deep") | |
| print(" am_michael - American male, conversational") | |
| print(" bf_isabella - British female, clear") | |
| print(" bm_george - British male, authoritative") | |
| sys.exit(1) | |
| epub_file = sys.argv[1] | |
| voice = sys.argv[2] if len(sys.argv) > 2 else "af_sky" | |
| model_path = os.path.expanduser("~/Downloads/kokoro-v0_19.onnx") | |
| # FIXED: Use voices.bin instead of voices.json | |
| voices_path = os.path.expanduser("~/Downloads/voices.bin") | |
| # Check if files exist | |
| if not os.path.exists(model_path): | |
| print(f"β Model file not found at: {model_path}") | |
| print("\nDownload with:") | |
| print(" wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx -O ~/Downloads/kokoro-v0_19.onnx") | |
| sys.exit(1) | |
| if not os.path.exists(voices_path): | |
| print(f"β Voices file not found at: {voices_path}") | |
| print("\nDownload with:") | |
| print(" wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin -O ~/Downloads/voices.bin") | |
| print("\nNote: You can delete the old voices.json file:") | |
| print(" rm ~/Downloads/voices.json") | |
| sys.exit(1) | |
| print(f"π EPUB: {os.path.basename(epub_file)}") | |
| print(f"π€ Voice: {voice}") | |
| print("π€ Loading Kokoro TTS model...") | |
| # Set environment variable to allow pickle | |
| os.environ['ALLOW_PICKLE'] = '1' | |
| # Check GPU availability | |
| try: | |
| import onnxruntime as ort | |
| available_providers = ort.get_available_providers() | |
| print(f"Available ONNX providers: {available_providers}") | |
| if 'CUDAExecutionProvider' in available_providers: | |
| print("β GPU acceleration available!") | |
| # Optional: Set TensorRT for even faster inference if available | |
| if 'TensorrtExecutionProvider' in available_providers: | |
| print(" π TensorRT also available for maximum performance") | |
| else: | |
| print("β οΈ CUDA not available. Running on CPU.") | |
| print(" Make sure onnxruntime-gpu is installed:") | |
| print(" pip install onnxruntime-gpu") | |
| except Exception as e: | |
| print(f"β οΈ Error checking GPU: {e}") | |
| # Load model | |
| try: | |
| print("Loading model...") | |
| kokoro = Kokoro(model_path, voices_path) | |
| print("β Model loaded successfully!") | |
| # Verify if GPU is being used | |
| try: | |
| if hasattr(kokoro, 'session'): | |
| active_providers = kokoro.session.get_providers() | |
| print(f" π Active providers: {active_providers}") | |
| if 'CUDAExecutionProvider' in str(active_providers): | |
| print(" β GPU is ACTIVE! Check nvtop.") | |
| else: | |
| print(" β οΈ Running on CPU only") | |
| except: | |
| pass | |
| except Exception as e: | |
| print(f"β Failed to load model: {e}") | |
| print("\nTroubleshooting tips:") | |
| print("1. Check if model files are corrupted: ls -lh ~/Downloads/kokoro*") | |
| print("2. Make sure you have voices.bin (not voices.json)") | |
| print("3. Try reinstalling: pip uninstall kokoro-onnx -y && pip install kokoro-onnx") | |
| sys.exit(1) | |
| print("\nπ Extracting chapters...") | |
| chapters = extract_text_from_epub_simple(epub_file) | |
| if not chapters: | |
| print("β No chapters found!") | |
| sys.exit(1) | |
| print(f"\nβ Found {len(chapters)} chapters") | |
| total_chars = sum(len(c) for c in chapters) | |
| print(f"Total text: {total_chars:,} characters") | |
| print(f"Estimated time: ~{total_chars/500:.0f} seconds (rough estimate with GPU)\n") | |
| book_name = Path(epub_file).stem | |
| output_dir = Path(f"{book_name}_audio") | |
| output_dir.mkdir(exist_ok=True) | |
| successful = 0 | |
| for i, chapter_text in enumerate(chapters, 1): | |
| print(f"\nπ Chapter {i}/{len(chapters)} - {len(chapter_text):,} chars") | |
| if i == 1: | |
| print(" β‘ First chapter starting - check nvtop for GPU activity!") | |
| # Split long chapters into smaller chunks for better processing | |
| if len(chapter_text) > 2000: | |
| print(f" βοΈ Splitting chapter into smaller chunks...") | |
| text_chunks = split_text_into_chunks(chapter_text, 1500) | |
| print(f" π¦ Processing {len(text_chunks)} chunks") | |
| all_samples = [] | |
| for chunk_idx, chunk in enumerate(text_chunks, 1): | |
| print(f" Chunk {chunk_idx}/{len(text_chunks)} - {len(chunk)} chars") | |
| try: | |
| samples, sample_rate = kokoro.create( | |
| chunk, | |
| voice=voice, | |
| speed=1.0, | |
| lang="en-us" | |
| ) | |
| all_samples.append(samples) | |
| except Exception as e: | |
| print(f" β Error on chunk: {e}") | |
| continue | |
| if all_samples: | |
| # Concatenate all chunks | |
| import numpy as np | |
| final_samples = np.concatenate(all_samples) | |
| output_file = output_dir / f"chapter_{i:03d}.wav" | |
| sf.write(output_file, final_samples, 24000) # Kokoro uses 24kHz | |
| duration = len(final_samples) / 24000 / 60 | |
| print(f" β Saved chapter {i}: {duration:.1f} minutes") | |
| successful += 1 | |
| else: | |
| # Process chapter normally | |
| try: | |
| samples, sample_rate = kokoro.create( | |
| chapter_text, | |
| voice=voice, | |
| speed=1.0, | |
| lang="en-us" | |
| ) | |
| output_file = output_dir / f"chapter_{i:03d}.wav" | |
| sf.write(output_file, samples, sample_rate) | |
| duration = len(samples) / sample_rate / 60 | |
| print(f" β Saved: {duration:.1f} minutes") | |
| successful += 1 | |
| except Exception as e: | |
| print(f" β Error: {e}") | |
| print(f"\n⨠Done! Created {successful}/{len(chapters)} chapters") | |
| print(f"π Files in: {output_dir}/") | |
| if successful > 0: | |
| print("\nπ΅ Combine into audiobook:") | |
| print(f" ffmpeg -f concat -safe 0 -i <(for f in {output_dir}/chapter_*.wav; do echo \"file '$PWD/$f'\"; done) -c copy \"{book_name}.wav\"") | |
| print(f" ffmpeg -i \"{book_name}.wav\" -c:a aac -b:a 128k \"{book_name}.m4b\"") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment