Spaces:

trysem
/

ttsm

Sleeping

App Files Files Community

ttsm / app.py

trysem

Create app.py

7a2a004 verified 12 days ago

raw

history blame contribute delete

3.33 kB

	import gradio as gr
	import torch
	import numpy as np
	from transformers import pipeline

	# 1. Setup device (Use GPU if available on the Space, otherwise CPU)
	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	print(f"Loading aoxo/swaram model on {device}...")

	# 2. Load the TTS pipeline globally so it only loads once when the Space starts
	try:
	synthesizer = pipeline("text-to-speech", model="aoxo/swaram", device=device)
	print("Model loaded successfully!")
	except Exception as e:
	print(f"Error loading model: {e}")
	synthesizer = None

	# 3. Define the prediction function
	def generate_audio(text):
	if not text.strip():
	return None, "Please enter some text."

	if synthesizer is None:
	return None, "Error: Model failed to load. Check Space logs."

	try:
	# Generate speech
	speech = synthesizer(text)

	# The transformers pipeline returns a dictionary:
	# {'audio': numpy array, 'sampling_rate': int}
	audio_data = speech["audio"]
	sample_rate = speech["sampling_rate"]

	# Gradio expects audio in (sample_rate, numpy_1D_array) format
	# Pipeline audio is usually shape (1, N). We squeeze it to (N,)
	if len(audio_data.shape) > 1:
	audio_data = np.squeeze(audio_data)

	return (sample_rate, audio_data), "Success!"

	except Exception as e:
	return None, f"Generation Error: {str(e)}"

	# 4. Build the Gradio Interface
	with gr.Blocks(title="Swaram Malayalam TTS", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🗣️ Swaram Malayalam Text-to-Speech
	Enter Malayalam text below to generate speech using the `aoxo/swaram` model.
	"""
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Enter Malayalam Text",
	placeholder="മലയാളം ടൈപ്പ് ചെയ്യുക...",
	lines=5
	)

	with gr.Row():
	clear_btn = gr.Button("Clear")
	generate_btn = gr.Button("Generate Speech", variant="primary")

	gr.Examples(
	examples=[
	["നമസ്കാരം, ഇതെന്റെ പുതിയ ശബ്ദമാണ്."],
	["കേരളം ദൈവത്തിന്റെ സ്വന്തം നാടാണ്."],
	["കള്ളാ കടയാടി മോനെ"]
	],
	inputs=[text_input],
	label="Examples"
	)

	with gr.Column():
	audio_output = gr.Audio(label="Generated Audio", type="numpy", interactive=False)
	status_output = gr.Textbox(label="Status", interactive=False)

	# Event Listeners
	generate_btn.click(
	fn=generate_audio,
	inputs=[text_input],
	outputs=[audio_output, status_output],
	api_name="synthesize" # Allows this Space to be used as an API later
	)

	clear_btn.click(
	fn=lambda: (None, None, ""),
	inputs=[],
	outputs=[text_input, audio_output, status_output]
	)

	# 5. Launch the app
	if __name__ == "__main__":
	demo.launch()