AI_IMG_SPEECH / app.py
arkokundu500's picture
Upload app.py
3575b3d verified
import streamlit as st
import google.generativeai as genai
from gtts import gTTS
from PIL import Image
import os
from dotenv import load_dotenv
load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
model = genai.GenerativeModel('gemini-1.5-flash')
def image_to_text(image):
response = model.generate_content(["Describe this image in such a way that a blind person can understand it in one line of 15 words.",image])
return response.text
def text_to_speech(text, filename= 'output.mp3'):
tts= gTTS(text=text,lang='en')
tts.save(filename)
return filename
def main():
st.title("Image to Speech Converter")
uploaded_file = st.file_uploader("Uplad an image",type=["jpeg","jpg","png"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_container_width=True)
if st.button("Generate Description"):
with st.spinner("Processing..."):
try:
# Generate description
description = image_to_text(image)
# Display description
st.subheader("Image Description")
st.write(description)
# Convert to speech
audio_file = text_to_speech(description)
# Play audio
st.audio(audio_file, format='audio/mp3')
# Download button
with open(audio_file, "rb") as file:
st.download_button(
label="Download Audio",
data=file,
file_name=audio_file,
mime="audio/mp3"
)
except Exception as e:
st.error(f"Error processing image: {str(e)}")
if __name__ == "__main__":
main()