dejanseo
/

chrome_models

LiteRT

TensorFlow Lite v3

Model card Files Files and versions Community

dejanseo commited on 5 days ago

Commit

a85f7db

verified ·

1 Parent(s): 3e77509

Upload demo.py

Browse files

Files changed (1) hide show

43/demo.py +266 -0

43/demo.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import streamlit as st
+import tensorflow as tf
+import sentencepiece as spm
+import numpy as np
+from scipy.spatial.distance import cosine
+import pandas as pd
+from openTSNE import TSNE
+import plotly.express as px
+import plotly.graph_objects as go
+# Set Streamlit layout to wide mode and remove padding
+st.set_page_config(layout="wide")
+# Remove default padding
+st.markdown("""
+    <style>
+        .block-container {
+            padding-top: 1rem;
+            padding-bottom: 0rem;
+            padding-left: 1rem;
+            padding-right: 1rem;
+        }
+    </style>
+    """, unsafe_allow_html=True)
+# Load the TFLite model and SentencePiece model
+tflite_model_path = "model.tflite"
+spm_model_path = "sentencepiece.model"
+sp = spm.SentencePieceProcessor()
+sp.load(spm_model_path)
+interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
+interpreter.allocate_tensors()
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+required_input_length = 64  # Fixed length of 64 tokens
+# Function to preprocess text input
+def preprocess_text(text, sp, required_length):
+    input_ids = sp.encode(text, out_type=int)
+    input_ids = input_ids[:required_length] + [0] * (required_length - len(input_ids))
+    return np.array(input_ids, dtype=np.int32).reshape(1, -1)
+# Function to generate embeddings
+def generate_embeddings(text):
+    input_data = preprocess_text(text, sp, required_input_length)
+    interpreter.set_tensor(input_details[0]['index'], input_data)
+    interpreter.invoke()
+    embedding = interpreter.get_tensor(output_details[0]['index'])
+    return embedding.flatten()
+# Function to calculate similarity scores between sentences
+def calculate_similarity(embedding1, embedding2):
+    return 1 - cosine(embedding1, embedding2)
+# Predefined sentence sets
+preset_sentences_a = [
+    "Dan Petrovic predicted conversational search in 2013.",
+    "Understanding user intent is key to effective SEO.",
+    "Dejan SEO has been a leader in data-driven SEO.",
+    "Machine learning is transforming search engines.",
+    "The future of search is AI-driven and personalized.",
+    "Search algorithms are evolving to better match user intent.",
+    "AI technologies enhance digital marketing strategies."
+]
+preset_sentences_b = [
+    "Advances in machine learning reshape how search engines operate.",
+    "Personalized content is becoming more prevalent with AI.",
+    "Customer behavior insights are crucial for marketing strategies.",
+    "Dan Petrovic anticipated the rise of chat-based search interactions.",
+    "Dejan SEO is recognized for innovative SEO research and analysis.",
+    "Quantum computing is advancing rapidly in the tech world.",
+    "Studying user behavior can improve the effectiveness of online ads."
+]
+# Initialize session state for input fields if not already set
+if "input_text_a" not in st.session_state:
+    st.session_state["input_text_a"] = "\n".join(preset_sentences_a)
+if "input_text_b" not in st.session_state:
+    st.session_state["input_text_b"] = "\n".join(preset_sentences_b)
+# Clear button to reset text areas
+if st.button("Clear Fields"):
+    st.session_state["input_text_a"] = ""
+    st.session_state["input_text_b"] = ""
+# Side-by-side layout for Set A and Set B inputs
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("Set A Sentences")
+    input_text_a = st.text_area("Set A", value=st.session_state["input_text_a"], height=200)
+with col2:
+    st.subheader("Set B Sentences")
+    input_text_b = st.text_area("Set B", value=st.session_state["input_text_b"], height=200)
+# Slider to control t-SNE iteration steps
+iterations = st.slider("Number of t-SNE Iterations (Higher values = more refined clusters)", 250, 1000, step=250)
+# Similarity threshold slider
+similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, 0.5, 0.05)
+# Submit button
+if st.button("Calculate Similarity"):
+    sentences_a = [line.strip() for line in input_text_a.split("\n") if line.strip()]
+    sentences_b = [line.strip() for line in input_text_b.split("\n") if line.strip()]
+    if len(sentences_a) > 0 and len(sentences_b) > 0:
+        # Generate embeddings for both sets
+        embeddings_a = [generate_embeddings(sentence) for sentence in sentences_a]
+        embeddings_b = [generate_embeddings(sentence) for sentence in sentences_b]
+        # Combine sentences and embeddings for both sets
+        all_sentences = sentences_a + sentences_b
+        all_embeddings = np.array(embeddings_a + embeddings_b)
+        labels = ["Set A"] * len(sentences_a) + ["Set B"] * len(sentences_b)
+        # Calculate similarity matrix
+        similarity_matrix = np.zeros((len(sentences_a), len(sentences_b)))
+        for i, emb_a in enumerate(embeddings_a):
+            for j, emb_b in enumerate(embeddings_b):
+                similarity_matrix[i, j] = calculate_similarity(emb_a, emb_b)
+        # Greedy approach to find best matches above the threshold
+        used_a = set()
+        used_b = set()
+        matches = []
+        pairs = []
+        for i in range(len(sentences_a)):
+            for j in range(len(sentences_b)):
+                pairs.append((i, j, similarity_matrix[i, j]))
+        # Sort pairs by highest similarity first
+        pairs.sort(key=lambda x: x[2], reverse=True)
+        for i, j, sim in pairs:
+            if i not in used_a and j not in used_b and sim >= similarity_threshold:
+                matches.append((i, j, sim))
+                used_a.add(i)
+                used_b.add(j)
+        # --------------------------------------
+        # 1) SHOW MATCH TABLE AT THE TOP USING st.dataframe (FILLING THE SCREEN)
+        # --------------------------------------
+        if len(matches) == 0:
+            st.warning("No sentence pairs exceeded the similarity threshold.")
+        else:
+            # Create a DataFrame for the matched pairs with original order information
+            df_matches = pd.DataFrame(
+                [
+                    (i+1, sentences_a[i], j+1, sentences_b[j], round(sim, 3))
+                    for (i, j, sim) in matches
+                ],
+                columns=["Set A Order", "Set A Sentence", "Set B Order", "Set B Sentence", "Similarity"]
+            )
+            st.subheader("Matched Sentences (Above Threshold)")
+            st.dataframe(df_matches, use_container_width=True)
+        # --------------------------------------
+        # 2) THEN PERFORM T-SNE AND SHOW 3D PLOT
+        # --------------------------------------
+        perplexity_value = min(5, len(all_sentences) - 1)
+        tsne = TSNE(
+            n_components=3,
+            perplexity=perplexity_value,
+            n_iter=iterations,
+            initialization="pca",
+            random_state=42
+        )
+        tsne_results = tsne.fit(all_embeddings)
+        # Prepare DataFrame for Plotly
+        df_tsne = pd.DataFrame({
+            "Sentence": all_sentences,
+            "Set": labels,
+            "X": tsne_results[:, 0],
+            "Y": tsne_results[:, 1],
+            "Z": tsne_results[:, 2]
+        })
+        # Create 3D scatter plot with connections
+        fig = go.Figure()
+        # Add scatter points for Set A
+        fig.add_trace(go.Scatter3d(
+            x=df_tsne[df_tsne["Set"] == "Set A"]["X"],
+            y=df_tsne[df_tsne["Set"] == "Set A"]["Y"],
+            z=df_tsne[df_tsne["Set"] == "Set A"]["Z"],
+            text=df_tsne[df_tsne["Set"] == "Set A"]["Sentence"],
+            mode='markers',
+            name='Set A',
+            marker=dict(size=5, color='blue')
+        ))
+        # Add scatter points for Set B
+        fig.add_trace(go.Scatter3d(
+            x=df_tsne[df_tsne["Set"] == "Set B"]["X"],
+            y=df_tsne[df_tsne["Set"] == "Set B"]["Y"],
+            z=df_tsne[df_tsne["Set"] == "Set B"]["Z"],
+            text=df_tsne[df_tsne["Set"] == "Set B"]["Sentence"],
+            mode='markers',
+            name='Set B',
+            marker=dict(size=5, color='red')
+        ))
+        # Optionally, add lines for sentence pairs above threshold
+        for i, emb_a in enumerate(embeddings_a):
+            pos_a = tsne_results[i]
+            for j, emb_b in enumerate(embeddings_b):
+                sim = similarity_matrix[i, j]
+                if sim >= similarity_threshold:
+                    pos_b = tsne_results[j + len(sentences_a)]
+                    fig.add_trace(go.Scatter3d(
+                        x=[pos_a[0], pos_b[0]],
+                        y=[pos_a[1], pos_b[1]],
+                        z=[pos_a[2], pos_b[2]],
+                        mode='lines',
+                        line=dict(color=f'rgba(150,150,150,{sim})', width=2),
+                        name=f'Similarity: {sim:.2f}',
+                        showlegend=False
+                    ))
+        fig.update_layout(
+            title="3D Visualization of Sentence Similarity with Connections",
+            width=1200,
+            height=800,
+            scene=dict(
+                xaxis_title="t-SNE Dimension 1",
+                yaxis_title="t-SNE Dimension 2",
+                zaxis_title="t-SNE Dimension 3"
+            )
+        )
+        st.plotly_chart(fig)
+        # --------------------------------------
+        # 3) SIMILARITY HEATMAP
+        # --------------------------------------
+        fig_heatmap = go.Figure(data=go.Heatmap(
+            z=similarity_matrix,
+            x=[f"B{i+1}" for i in range(len(sentences_b))],
+            y=[f"A{i+1}" for i in range(len(sentences_a))],
+            colorscale="Viridis",
+            text=np.round(similarity_matrix, 2),
+            texttemplate="%{text}",
+            textfont={"size": 10},
+            hoverongaps=False
+        ))
+        fig_heatmap.update_layout(
+            title="Similarity Heatmap between Set A and Set B",
+            width=None,  # Full width
+            height=400,
+            margin=dict(l=20, r=20, t=40, b=20),
+            xaxis_title="Set B Sentences",
+            yaxis_title="Set A Sentences"
+        )
+        st.plotly_chart(fig_heatmap)
+    else:
+        st.warning("Please enter sentences in both Set A and Set B.")