srugano · March 16, 2026 11:55
diff --git a/convert_epub.py b/convert_epub.py
 #!/usr/bin/env python3
 import sys
 import os
 import zipfile
 from pathlib import Path
 import soundfile as sf
 import html
 import re
 from typing import List
 import warnings
 warnings.filterwarnings('ignore')

 # Monkey patch numpy to allow pickle loading
 import numpy
 numpy_load_old = numpy.load
 numpy.load = lambda *a, **k: numpy_load_old(*a, allow_pickle=True, **k)

 # Import kokoro-onnx
 try:
    from kokoro_onnx import Kokoro
    import kokoro_onnx
    version = getattr(kokoro_onnx, '__version__', 'unknown')
    print(f"✅ Kokoro-ONNX version: {version}")
 except ImportError as e:
    print(f"⚠️  Kokoro-ONNX import failed: {e}")
    sys.exit(1)

 def extract_text_from_epub_simple(epub_path: str) -> List[str]:
    chapters = []
    try:
        with zipfile.ZipFile(epub_path, 'r') as z:
            html_files = [f for f in z.namelist() if f.endswith(('.html', '.xhtml', '.htm'))]
            html_files.sort()
            for html_file in html_files:
                with z.open(html_file) as f:
                    content = f.read().decode('utf-8', errors='ignore')
                    text = re.sub(r'<[^>]+>', ' ', content)
                    text = html.unescape(text)
                    text = re.sub(r'\s+', ' ', text).strip()
                    if len(text) > 500:
                        chapters.append(text)
                        print(f"  📖 Found chapter {len(chapters)}: {len(text)} chars")
    except Exception as e:
        print(f"❌ Error reading EPUB: {e}")
        return []
    return chapters

 def split_text_into_chunks(text: str, max_chunk_size: int = 1000) -> List[str]:
    """Split text into smaller chunks for better TTS processing"""
    # Split by sentences to avoid cutting in the middle
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_chunk_size:
            current_chunk += " " + sentence if current_chunk else sentence
        else:
            if current_chunk:
                chunks.append(current_chunk)
            current_chunk = sentence
    
    if current_chunk:
        chunks.append(current_chunk)
    
    return chunks

 def main():
    if len(sys.argv) < 2:
        print("Usage: python convert_epub.py your_book.epub [voice_name]")
        print("\nAvailable voices:")
        print("  af_sky     - American female (recommended)")
        print("  af_bella   - American female, warm")
        print("  am_adam    - American male, deep")
        print("  am_michael - American male, conversational")
        print("  bf_isabella - British female, clear")
        print("  bm_george   - British male, authoritative")
        sys.exit(1)
    
    epub_file = sys.argv[1]
    voice = sys.argv[2] if len(sys.argv) > 2 else "af_sky"
    
    model_path = os.path.expanduser("~/Downloads/kokoro-v0_19.onnx")
    # FIXED: Use voices.bin instead of voices.json
    voices_path = os.path.expanduser("~/Downloads/voices.bin")
    
    # Check if files exist
    if not os.path.exists(model_path):
        print(f"❌ Model file not found at: {model_path}")
        print("\nDownload with:")
        print("  wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx -O ~/Downloads/kokoro-v0_19.onnx")
        sys.exit(1)
    
    if not os.path.exists(voices_path):
        print(f"❌ Voices file not found at: {voices_path}")
        print("\nDownload with:")
        print("  wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin -O ~/Downloads/voices.bin")
        print("\nNote: You can delete the old voices.json file:")
        print("  rm ~/Downloads/voices.json")
        sys.exit(1)
    
    print(f"📖 EPUB: {os.path.basename(epub_file)}")
    print(f"🎤 Voice: {voice}")
    print("🤖 Loading Kokoro TTS model...")
    
    # Set environment variable to allow pickle
    os.environ['ALLOW_PICKLE'] = '1'
    
    # Check GPU availability
    try:
        import onnxruntime as ort
        available_providers = ort.get_available_providers()
        print(f"Available ONNX providers: {available_providers}")
        
        if 'CUDAExecutionProvider' in available_providers:
            print("✅ GPU acceleration available!")
            # Optional: Set TensorRT for even faster inference if available
            if 'TensorrtExecutionProvider' in available_providers:
                print("  🚀 TensorRT also available for maximum performance")
        else:
            print("⚠️  CUDA not available. Running on CPU.")
            print("   Make sure onnxruntime-gpu is installed:")
            print("   pip install onnxruntime-gpu")
    except Exception as e:
        print(f"⚠️  Error checking GPU: {e}")
    
    # Load model
    try:
        print("Loading model...")
        kokoro = Kokoro(model_path, voices_path)
        print("✅ Model loaded successfully!")
        
        # Verify if GPU is being used
        try:
            if hasattr(kokoro, 'session'):
                active_providers = kokoro.session.get_providers()
                print(f"  🔍 Active providers: {active_providers}")
                if 'CUDAExecutionProvider' in str(active_providers):
                    print("  ✅ GPU is ACTIVE! Check nvtop.")
                else:
                    print("  ⚠️  Running on CPU only")
        except:
            pass
                
    except Exception as e:
        print(f"❌ Failed to load model: {e}")
        print("\nTroubleshooting tips:")
        print("1. Check if model files are corrupted: ls -lh ~/Downloads/kokoro*")
        print("2. Make sure you have voices.bin (not voices.json)")
        print("3. Try reinstalling: pip uninstall kokoro-onnx -y && pip install kokoro-onnx")
        sys.exit(1)
    
    print("\n📚 Extracting chapters...")
    chapters = extract_text_from_epub_simple(epub_file)
    
    if not chapters:
        print("❌ No chapters found!")
        sys.exit(1)
    
    print(f"\n✅ Found {len(chapters)} chapters")
    total_chars = sum(len(c) for c in chapters)
    print(f"Total text: {total_chars:,} characters")
    print(f"Estimated time: ~{total_chars/500:.0f} seconds (rough estimate with GPU)\n")
    
    book_name = Path(epub_file).stem
    output_dir = Path(f"{book_name}_audio")
    output_dir.mkdir(exist_ok=True)
    
    successful = 0
    for i, chapter_text in enumerate(chapters, 1):
        print(f"\n📖 Chapter {i}/{len(chapters)} - {len(chapter_text):,} chars")
        
        if i == 1:
            print("  ⚡ First chapter starting - check nvtop for GPU activity!")
        
        # Split long chapters into smaller chunks for better processing
        if len(chapter_text) > 2000:
            print(f"  ✂️  Splitting chapter into smaller chunks...")
            text_chunks = split_text_into_chunks(chapter_text, 1500)
            print(f"  📦 Processing {len(text_chunks)} chunks")
            
            all_samples = []
            for chunk_idx, chunk in enumerate(text_chunks, 1):
                print(f"     Chunk {chunk_idx}/{len(text_chunks)} - {len(chunk)} chars")
                try:
                    samples, sample_rate = kokoro.create(
                        chunk, 
                        voice=voice, 
                        speed=1.0, 
                        lang="en-us"
                    )
                    all_samples.append(samples)
                except Exception as e:
                    print(f"     ❌ Error on chunk: {e}")
                    continue
            
            if all_samples:
                # Concatenate all chunks
                import numpy as np
                final_samples = np.concatenate(all_samples)
                output_file = output_dir / f"chapter_{i:03d}.wav"
                sf.write(output_file, final_samples, 24000)  # Kokoro uses 24kHz
                duration = len(final_samples) / 24000 / 60
                print(f"  ✅ Saved chapter {i}: {duration:.1f} minutes")
                successful += 1
        else:
            # Process chapter normally
            try:
                samples, sample_rate = kokoro.create(
                    chapter_text, 
                    voice=voice, 
                    speed=1.0, 
                    lang="en-us"
                )
                output_file = output_dir / f"chapter_{i:03d}.wav"
                sf.write(output_file, samples, sample_rate)
                duration = len(samples) / sample_rate / 60
                print(f"  ✅ Saved: {duration:.1f} minutes")
                successful += 1
            except Exception as e:
                print(f"  ❌ Error: {e}")
    
    print(f"\n✨ Done! Created {successful}/{len(chapters)} chapters")
    print(f"📁 Files in: {output_dir}/")
    
    if successful > 0:
        print("\n🎵 Combine into audiobook:")
        print(f"  ffmpeg -f concat -safe 0 -i <(for f in {output_dir}/chapter_*.wav; do echo \"file '$PWD/$f'\"; done) -c copy \"{book_name}.wav\"")
        print(f"  ffmpeg -i \"{book_name}.wav\" -c:a aac -b:a 128k \"{book_name}.m4b\"")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import sys
	import os
	import zipfile
	from pathlib import Path
	import soundfile as sf
	import html
	import re
	from typing import List
	import warnings
	warnings.filterwarnings('ignore')

	# Monkey patch numpy to allow pickle loading
	import numpy
	numpy_load_old = numpy.load
	numpy.load = lambda a, k: numpy_load_old(a, allow_pickle=True, **k)

	# Import kokoro-onnx
	try:
	from kokoro_onnx import Kokoro
	import kokoro_onnx
	version = getattr(kokoro_onnx, '__version__', 'unknown')
	print(f"✅ Kokoro-ONNX version: {version}")
	except ImportError as e:
	print(f"⚠️ Kokoro-ONNX import failed: {e}")
	sys.exit(1)

	def extract_text_from_epub_simple(epub_path: str) -> List[str]:
	chapters = []
	try:
	with zipfile.ZipFile(epub_path, 'r') as z:
	html_files = [f for f in z.namelist() if f.endswith(('.html', '.xhtml', '.htm'))]
	html_files.sort()
	for html_file in html_files:
	with z.open(html_file) as f:
	content = f.read().decode('utf-8', errors='ignore')
	text = re.sub(r'<[^>]+>', ' ', content)
	text = html.unescape(text)
	text = re.sub(r'\s+', ' ', text).strip()
	if len(text) > 500:
	chapters.append(text)
	print(f" 📖 Found chapter {len(chapters)}: {len(text)} chars")
	except Exception as e:
	print(f"❌ Error reading EPUB: {e}")
	return []
	return chapters

	def split_text_into_chunks(text: str, max_chunk_size: int = 1000) -> List[str]:
	"""Split text into smaller chunks for better TTS processing"""
	# Split by sentences to avoid cutting in the middle
	sentences = re.split(r'(?<=[.!?])\s+', text)
	chunks = []
	current_chunk = ""

	for sentence in sentences:
	if len(current_chunk) + len(sentence) <= max_chunk_size:
	current_chunk += " " + sentence if current_chunk else sentence
	else:
	if current_chunk:
	chunks.append(current_chunk)
	current_chunk = sentence

	if current_chunk:
	chunks.append(current_chunk)

	return chunks

	def main():
	if len(sys.argv) < 2:
	print("Usage: python convert_epub.py your_book.epub [voice_name]")
	print("\nAvailable voices:")
	print(" af_sky - American female (recommended)")
	print(" af_bella - American female, warm")
	print(" am_adam - American male, deep")
	print(" am_michael - American male, conversational")
	print(" bf_isabella - British female, clear")
	print(" bm_george - British male, authoritative")
	sys.exit(1)

	epub_file = sys.argv[1]
	voice = sys.argv[2] if len(sys.argv) > 2 else "af_sky"

	model_path = os.path.expanduser("~/Downloads/kokoro-v0_19.onnx")
	# FIXED: Use voices.bin instead of voices.json
	voices_path = os.path.expanduser("~/Downloads/voices.bin")

	# Check if files exist
	if not os.path.exists(model_path):
	print(f"❌ Model file not found at: {model_path}")
	print("\nDownload with:")
	print(" wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx -O ~/Downloads/kokoro-v0_19.onnx")
	sys.exit(1)

	if not os.path.exists(voices_path):
	print(f"❌ Voices file not found at: {voices_path}")
	print("\nDownload with:")
	print(" wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin -O ~/Downloads/voices.bin")
	print("\nNote: You can delete the old voices.json file:")
	print(" rm ~/Downloads/voices.json")
	sys.exit(1)

	print(f"📖 EPUB: {os.path.basename(epub_file)}")
	print(f"🎤 Voice: {voice}")
	print("🤖 Loading Kokoro TTS model...")

	# Set environment variable to allow pickle
	os.environ['ALLOW_PICKLE'] = '1'

	# Check GPU availability
	try:
	import onnxruntime as ort
	available_providers = ort.get_available_providers()
	print(f"Available ONNX providers: {available_providers}")

	if 'CUDAExecutionProvider' in available_providers:
	print("✅ GPU acceleration available!")
	# Optional: Set TensorRT for even faster inference if available
	if 'TensorrtExecutionProvider' in available_providers:
	print(" 🚀 TensorRT also available for maximum performance")
	else:
	print("⚠️ CUDA not available. Running on CPU.")
	print(" Make sure onnxruntime-gpu is installed:")
	print(" pip install onnxruntime-gpu")
	except Exception as e:
	print(f"⚠️ Error checking GPU: {e}")

	# Load model
	try:
	print("Loading model...")
	kokoro = Kokoro(model_path, voices_path)
	print("✅ Model loaded successfully!")

	# Verify if GPU is being used
	try:
	if hasattr(kokoro, 'session'):
	active_providers = kokoro.session.get_providers()
	print(f" 🔍 Active providers: {active_providers}")
	if 'CUDAExecutionProvider' in str(active_providers):
	print(" ✅ GPU is ACTIVE! Check nvtop.")
	else:
	print(" ⚠️ Running on CPU only")
	except:
	pass

	except Exception as e:
	print(f"❌ Failed to load model: {e}")
	print("\nTroubleshooting tips:")
	print("1. Check if model files are corrupted: ls -lh ~/Downloads/kokoro*")
	print("2. Make sure you have voices.bin (not voices.json)")
	print("3. Try reinstalling: pip uninstall kokoro-onnx -y && pip install kokoro-onnx")
	sys.exit(1)

	print("\n📚 Extracting chapters...")
	chapters = extract_text_from_epub_simple(epub_file)

	if not chapters:
	print("❌ No chapters found!")
	sys.exit(1)

	print(f"\n✅ Found {len(chapters)} chapters")
	total_chars = sum(len(c) for c in chapters)
	print(f"Total text: {total_chars:,} characters")
	print(f"Estimated time: ~{total_chars/500:.0f} seconds (rough estimate with GPU)\n")

	book_name = Path(epub_file).stem
	output_dir = Path(f"{book_name}_audio")
	output_dir.mkdir(exist_ok=True)

	successful = 0
	for i, chapter_text in enumerate(chapters, 1):
	print(f"\n📖 Chapter {i}/{len(chapters)} - {len(chapter_text):,} chars")

	if i == 1:
	print(" ⚡ First chapter starting - check nvtop for GPU activity!")

	# Split long chapters into smaller chunks for better processing
	if len(chapter_text) > 2000:
	print(f" ✂️ Splitting chapter into smaller chunks...")
	text_chunks = split_text_into_chunks(chapter_text, 1500)
	print(f" 📦 Processing {len(text_chunks)} chunks")

	all_samples = []
	for chunk_idx, chunk in enumerate(text_chunks, 1):
	print(f" Chunk {chunk_idx}/{len(text_chunks)} - {len(chunk)} chars")
	try:
	samples, sample_rate = kokoro.create(
	chunk,
	voice=voice,
	speed=1.0,
	lang="en-us"
	)
	all_samples.append(samples)
	except Exception as e:
	print(f" ❌ Error on chunk: {e}")
	continue

	if all_samples:
	# Concatenate all chunks
	import numpy as np
	final_samples = np.concatenate(all_samples)
	output_file = output_dir / f"chapter_{i:03d}.wav"
	sf.write(output_file, final_samples, 24000) # Kokoro uses 24kHz
	duration = len(final_samples) / 24000 / 60
	print(f" ✅ Saved chapter {i}: {duration:.1f} minutes")
	successful += 1
	else:
	# Process chapter normally
	try:
	samples, sample_rate = kokoro.create(
	chapter_text,
	voice=voice,
	speed=1.0,
	lang="en-us"
	)
	output_file = output_dir / f"chapter_{i:03d}.wav"
	sf.write(output_file, samples, sample_rate)
	duration = len(samples) / sample_rate / 60
	print(f" ✅ Saved: {duration:.1f} minutes")
	successful += 1
	except Exception as e:
	print(f" ❌ Error: {e}")

	print(f"\n✨ Done! Created {successful}/{len(chapters)} chapters")
	print(f"📁 Files in: {output_dir}/")

	if successful > 0:
	print("\n🎵 Combine into audiobook:")
	print(f" ffmpeg -f concat -safe 0 -i <(for f in {output_dir}/chapter_*.wav; do echo \"file '$PWD/$f'\"; done) -c copy \"{book_name}.wav\"")
	print(f" ffmpeg -i \"{book_name}.wav\" -c:a aac -b:a 128k \"{book_name}.m4b\"")

	if __name__ == "__main__":
	main()
No results found