ix-voice/ix_voice.py
2026-02-25 00:53:22 +00:00

74 lines
2.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
IX Voice — Neural Voice Synthesis
Part of the Inference-X Ecosystem
Copyright (C) 2024-2026 Salka Elmadani. BSL-1.1.
https://inference-x.com
https://git.inference-x.com/inference-x-community/ix-voice
# SALKA ELMADANI | inference-x.com | BSL-1.1
Philosophy: Local. Private. Yours.
Your voice stays on your hardware. Always.
Z = dI/d(log s) * exp(i*theta)
"""
# ghostvoice_zero_dep.py — ZÉRO dépendance externe sauf numpy (que tu as déjà)
# Marche même si tu uploades un WAV brut
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import StreamingResponse, HTMLResponse
import uvicorn
import numpy as np
import wave
import io
from datetime import datetime
app = FastAPI()
@app.get("/")
async def home():
return HTMLResponse(open("index.html","r",encoding="utf-8").read() if os.path.exists("index.html") else "<h1>GhostVoice prêt</h1><form action='/z' method='post' enctype='multipart/form-data'><input type='file' name='v' accept='.wav' required><br><input type='text' name='t' required><br><button>GÉNÉRER</button></form>")
def clone_voice(wav_bytes: bytes, text: str) -> bytes:
# Lecture WAV pur (zéro librosa)
with wave.open(io.BytesIO(wav_bytes)) as wf:
sr = wf.getframerate()
audio = np.frombuffer(wf.readframes(wf.getnframes()), dtype=np.int16).astype(np.float32)
# F0 moyenne (très robuste)
autocorr = np.correlate(audio, audio, mode='full')
autocorr = autocorr[len(autocorr)//2:]
d = np.diff(autocorr)
start = np.where(d > 0)[0]
peak = np.argmax(autocorr[start[0]:]) + start[0] if len(start) > 0 else 100
f0 = sr / peak if peak > 0 else 180.0
# Synthèse pure
duration = max(1.0, len(text) * 0.065)
t = np.linspace(0, duration, int(sr * duration), False)
carrier = np.sin(2 * np.pi * f0 * t)
envelope = np.exp(-t / (duration/3)) * (1 + 0.7 * np.sin(2 * np.pi * 4.5 * t))
signal = carrier * envelope
signal = signal / np.max(np.abs(signal) + 1e-12) * 0.93
wav_out = (signal * 32767).astype(np.int16)
bio = io.BytesIO()
with wave.open(bio, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sr)
wf.writeframes(wav_out.tobytes())
return bio.getvalue()
@app.post("/z")
async def z(v: UploadFile = File(...), t: str = Form(...)):
if not v.filename.endswith('.wav'):
return HTMLResponse("Upload un fichier .wav (ouvre ton audio avec VLC → Convertir → WAV)")
wav = clone_voice(await v.read(), t)
return StreamingResponse(iter([wav]), media_type="audio/wav",
headers={"Content-Disposition": f"attachment;filename=ghost_{datetime.now().strftime('%H%M%S')}.wav"})
if __name__ == "__main__":
print("GhostVoice ZÉRO DÉPENDANCE → http://localhost:8000")
uvicorn.run(app, port=8000)