diff --git a/main.py b/main.py
index d363ead..6a55b35 100644
--- a/main.py
+++ b/main.py
@@ -166,7 +166,7 @@ def get_custom_voices() -> List[str]:
 
 async def generate_speech(text: str, voice: str) -> bytes:
     """
-    Generate speech using Orpheus model (async version).
+    Generate speech using Orpheus model (async wrapper).
     
     Args:
         text: Text to convert (may include emotion tags)
@@ -191,31 +191,36 @@ async def generate_speech(text: str, voice: str) -> bytes:
     
     print(f"{text}")
     
-    # Generate speech using Orpheus - async iteration!
-    import numpy as np
-    audio_chunks = []
+    # Run synchronous generation in thread pool to not block event loop
+    def _generate_sync():
+        import numpy as np
+        audio_chunks = []
+        
+        syn_tokens = model.generate_speech(
+            prompt=text,
+            voice=voice,
+        )
+        
+        # Sync iteration - generator yields audio chunks
+        for audio_chunk in syn_tokens:
+            audio_chunks.append(audio_chunk)
+        
+        # Combine chunks into single audio
+        audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
+        
+        # Convert to WAV bytes
+        buffer = io.BytesIO()
+        with wave.open(buffer, 'wb') as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)  # 16-bit
+            wf.setframerate(SAMPLE_RATE)
+            wf.writeframes(audio_data)
+        
+        return buffer.getvalue()
     
-    syn_tokens = model.generate_speech(
-        prompt=text,
-        voice=voice,
-    )
-    
-    # Async iteration over the generator
-    async for audio_chunk in syn_tokens:
-        audio_chunks.append(audio_chunk)
-    
-    # Combine chunks into single audio
-    audio_data = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
-    
-    # Convert to WAV bytes
-    buffer = io.BytesIO()
-    with wave.open(buffer, 'wb') as wf:
-        wf.setnchannels(1)
-        wf.setsampwidth(2)  # 16-bit
-        wf.setframerate(SAMPLE_RATE)
-        wf.writeframes(audio_data)
-    
-    return buffer.getvalue()
+    # Run in executor to avoid blocking
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, _generate_sync)
 
 
 def save_audio_to_file(job_id: str, audio_bytes: bytes) -> str:
@@ -528,15 +533,15 @@ async def stream_tts(request: TTSStreamRequest):
     if voice not in BUILTIN_VOICES:
         voice = DEFAULT_VOICE
     
-    async def audio_generator():
-        """Generate audio chunks (async)"""
+    def sync_audio_generator():
+        """Generate audio chunks (sync generator)"""
         try:
             syn_tokens = model.generate_speech(
                 prompt=request.text,
                 voice=voice,
             )
             
-            async for audio_chunk in syn_tokens:
+            for audio_chunk in syn_tokens:
                 yield audio_chunk
                 
         except Exception as e:
@@ -544,7 +549,7 @@ async def stream_tts(request: TTSStreamRequest):
             raise
     
     return StreamingResponse(
-        audio_generator(),
+        sync_audio_generator(),
         media_type="audio/wav"
     )