|
import streamlit as st
|
|
import google.generativeai as genai
|
|
from gtts import gTTS
|
|
from PIL import Image
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
|
|
|
model = genai.GenerativeModel('gemini-1.5-flash')
|
|
|
|
def image_to_text(image):
|
|
response = model.generate_content(["Describe this image in such a way that a blind person can understand it in one line of 15 words.",image])
|
|
return response.text
|
|
def text_to_speech(text, filename= 'output.mp3'):
|
|
tts= gTTS(text=text,lang='en')
|
|
tts.save(filename)
|
|
return filename
|
|
|
|
def main():
|
|
st.title("Image to Speech Converter")
|
|
|
|
uploaded_file = st.file_uploader("Uplad an image",type=["jpeg","jpg","png"])
|
|
|
|
if uploaded_file is not None:
|
|
image = Image.open(uploaded_file)
|
|
st.image(image, caption="Uploaded Image", use_container_width=True)
|
|
|
|
if st.button("Generate Description"):
|
|
with st.spinner("Processing..."):
|
|
try:
|
|
|
|
description = image_to_text(image)
|
|
|
|
|
|
st.subheader("Image Description")
|
|
st.write(description)
|
|
|
|
|
|
audio_file = text_to_speech(description)
|
|
|
|
|
|
st.audio(audio_file, format='audio/mp3')
|
|
|
|
|
|
with open(audio_file, "rb") as file:
|
|
st.download_button(
|
|
label="Download Audio",
|
|
data=file,
|
|
file_name=audio_file,
|
|
mime="audio/mp3"
|
|
)
|
|
|
|
except Exception as e:
|
|
st.error(f"Error processing image: {str(e)}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|