From 22aec2859259c8b9d1557d172957f203da2683bd Mon Sep 17 00:00:00 2001
From: Paolo <p.bassano@awaves.it>
Date: Thu, 3 Jul 2025 12:58:03 +0200
Subject: [PATCH] init chatbot

---
 .gitignore                       |   3 +
 agents/customer_service.py       | 158 +++++++++++++++++++++++++++++++
 agents/customer_service_agent.py |  85 +++++++++++++++++
 app.py                           |  66 +++++++++++++
 4 files changed, 312 insertions(+)
 create mode 100644 agents/customer_service.py
 create mode 100644 agents/customer_service_agent.py
 create mode 100644 app.py

diff --git a/.gitignore b/.gitignore
index 5d381cc..58cf838 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,6 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+tmp
+
+.idea
diff --git a/agents/customer_service.py b/agents/customer_service.py
new file mode 100644
index 0000000..7ef4d17
--- /dev/null
+++ b/agents/customer_service.py
@@ -0,0 +1,158 @@
+from agno.embedder.sentence_transformer import SentenceTransformerEmbedder
+import os
+import json
+from agno.agent import Agent, RunResponse
+from agno.knowledge.pdf import PDFKnowledgeBase, PDFReader
+from agno.vectordb.pgvector import PgVector, SearchType
+from agno.storage.agent.sqlite import SqliteAgentStorage
+from agno.memory.v2 import Memory
+from agno.memory.v2.db.sqlite import SqliteMemoryDb
+from agno.models.groq import Groq
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def init_support_agents(reload_knowledge=False, recreate_knowledge=False):
+    db_url = os.getenv("DOCKER_DB_URL")
+    kb = PDFKnowledgeBase(
+        path="tmp/knowledge-base/", reader=PDFReader(chunk=True),
+        vector_db=PgVector(
+            table_name="kb_documents", db_url=db_url, search_type=SearchType.hybrid,
+            embedder=SentenceTransformerEmbedder(id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
+        ),
+    )
+
+    kb.load(recreate=recreate_knowledge)
+
+    storage = SqliteAgentStorage(table_name="agent_sessions", db_file="tmp/agent_sessions.db")
+    memory = Memory(db=SqliteMemoryDb(table_name="user_memories", db_file="tmp/user_memory.db"))
+
+    context_agent = Agent(
+        model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"),
+        storage=storage,
+        instructions=[
+            """
+            You are a structured context extractor.
+            Given a user message, output a strict JSON object with the following fields:
+            
+            {
+              "user_goal": string,              # what the user is trying to achieve
+              "item_model": string | null,      # product or item model, if any
+              "platform": string | null,        # device, system, or platform
+              "emotional_state": string | null, # inferred emotion or sentiment
+              "prior_steps": string | null      # steps already taken by the user
+            }
+            
+            - Always return valid JSON only. No explanations, no markdown.
+            - Use null for missing fields.
+            - Be conservative in assumptions.
+            """
+        ],
+    )
+
+    knowledge_agent = Agent(
+        model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"),
+        knowledge=kb,
+        search_knowledge=True,
+        add_context=True,
+        instructions=[
+            """You are a knowledge retriever agent.
+            Given a JSON context, search the knowledge base for the top 3 most relevant snippets.
+            
+            Output a plain text list like this:
+            
+            - "[Snippet 1 content]" (Document: <doc_id or title>)
+            - "[Snippet 2 content]" (Document: <doc_id or title>)
+            - "[Snippet 3 content]" (Document: <doc_id or title>)
+            
+            Guidelines:
+            - Do not speculate or infer beyond retrieved content.
+            - Prefer procedural or troubleshooting instructions if the user_goal implies an action.
+            - Snippets should be self-contained and relevant to the user_goal, platform, or item_model.
+            
+            """
+        ],
+    )
+
+    conversation_agent = Agent(
+        model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"),
+        storage=storage,
+        instructions=[
+            """
+            You are a professional AI-powered customer support assistant. Your goal is to solve the user's issue directly, clearly, and thoroughly using the provided context and knowledge base.
+
+            Guidelines:
+            - Always provide **step-by-step help** whenever possible.
+            - Use the **knowledge base snippets** to inform your reply. Rephrase them in user-friendly language.
+            - Do **not** tell the user to go check a manual, a website, or external source unless the information is truly not available in the snippets.
+            - If specific product details are missing from the context, ask for clarification — don’t assume.
+            - End with a friendly question to continue support, like:
+              - "Did that help resolve your issue?"
+              - "Would you like me to guide you through that step?"
+              - "Is there anything else you'd like to check?"
+            
+            Example structure:
+            1. Brief recap of already tried actions, based on last messages. Max 2 row.
+            2. Address the issue with specifics from KB
+            3. Offer additional help
+            
+            Do NOT include markdown, citations, or code unless explicitly required.
+            Do NOT be generic. You MUST be as specific as possible.
+            
+            If the already tried actions are more than 5 or 6, inform the user that you have informed the assistance about his problem, and they will contact him, then ask if he wants to continue in the meantime.
+            
+            All user messages are in ITALIAN. You must reply only in ITALIAN. Do not add text in english NEVER.
+            Adding text in english is FORBIDDEN.
+            """
+        ],
+    )
+
+    return context_agent, knowledge_agent, conversation_agent
+
+
+def get_customer_agents_response(user_message: str, user_id: str = "user_1", session_id: str = None, history=None):
+    ctx_agent, kb_agent, convo_agent = init_support_agents()
+
+    ctx_run: RunResponse = ctx_agent.run(user_message, user_id=user_id, session_id=session_id)
+    ctx_text = ctx_run.content.strip()
+    try:
+        ctx = json.loads(ctx_text)
+    except json.JSONDecodeError:
+        ctx = {}
+
+    kb_prompt = json.dumps(ctx)
+    kb_run: RunResponse = kb_agent.run(kb_prompt, user_id=user_id, session_id=session_id)
+    kb_text = kb_run.content.strip()
+
+    history_lines = []
+    if history:
+        for msg in history[-4:]:
+            role = "Utente" if msg["role"] == "user" else "Assistente"
+            history_lines.append(f"{role}: {msg['content'].strip()}")
+    history_str = "\n".join(history_lines)
+
+    convo_prompt = f"""
+        Sei un assistente virtuale professionale.
+        
+        Tieni conto del contesto, della cronologia della conversazione e degli snippet della knowledge base.
+        
+        CONTESTO ESTRATTO:
+        {json.dumps(ctx, indent=2, ensure_ascii=False)}
+        
+        SNIPPET DELLA KNOWLEDGE BASE:
+        {kb_text}
+        
+        STORIA DELLA CONVERSAZIONE:
+        {history_str}
+        
+        NUOVA DOMANDA DELL'UTENTE:
+        Utente: {user_message}
+        
+        Rispondi in modo utile e dettagliato. Non fare riferimento a fonti esterne. Rispondi solo in ITALIANO.
+    """
+
+    convo_run: RunResponse = convo_agent.run(convo_prompt, user_id=user_id, session_id=session_id)
+    return convo_run.content.strip(), convo_run.session_id
+
+
diff --git a/agents/customer_service_agent.py b/agents/customer_service_agent.py
new file mode 100644
index 0000000..e1f9471
--- /dev/null
+++ b/agents/customer_service_agent.py
@@ -0,0 +1,85 @@
+import os
+
+from agno.agent import Agent
+from agno.document.reader.pdf_reader import PDFReader
+from agno.embedder.sentence_transformer import SentenceTransformerEmbedder
+from agno.knowledge.pdf import PDFKnowledgeBase
+from agno.memory import AgentMemory
+from agno.memory.classifier import MemoryClassifier
+from agno.memory.summarizer import MemorySummarizer
+from agno.memory.v2 import Memory
+from agno.memory.v2.db.sqlite import SqliteMemoryDb
+from agno.models.groq import Groq
+from agno.storage.agent.sqlite import SqliteAgentStorage
+from agno.vectordb.pgvector import PgVector
+from agno.vectordb.search import SearchType
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def get_customer_service_agent(reload_knowledge=False, recreate_knowledge=False):
+    docker_url = os.getenv("DOCKER_DB_URL")
+
+    vector_db = PgVector(
+        table_name="agent_kb",
+        search_type=SearchType.hybrid,
+        db_url=docker_url,
+        embedder=SentenceTransformerEmbedder(id="sentence-transformers/all-MiniLM-L6-v2"),
+        auto_upgrade_schema=True,
+    )
+
+    kb = PDFKnowledgeBase(
+        path="tmp/knowledge-base/",
+        reader=PDFReader(chunk_size=500),
+        vector_db=vector_db,
+    )
+
+    storage = SqliteAgentStorage(table_name="agent_sessions", db_file="tmp/agent_memory/agno_agent_storage.db")
+
+    memory = Memory(
+        # model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"),
+        db=SqliteMemoryDb(table_name="user_memories", db_file="tmp/memory/agent.db"),
+    )
+
+    evaluator_agent = Agent(
+        model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"),
+        storage=storage,
+        num_history_responses=3,
+        add_history_to_messages=True,
+        knowledge=kb,
+        search_knowledge=True,
+        memory=memory,
+        read_chat_history=True,
+        instructions="""
+            You are a highly skilled, professional, and empathetic customer support agent. 
+            Engage naturally and build rapport with the user while maintaining a polite and supportive tone. 
+            Your style is professional yet approachable, concise yet thorough. 
+            
+            When you receive a user question:
+            - Carefully analyze it and think step by step before responding. 
+            - If the question is ambiguous or lacks detail, politely ask clarifying questions. 
+            - Use your knowledge base to enrich your reply with verified and up-to-date information. 
+            - When referencing knowledge, explicitly cite the section or document name when possible (e.g., "according to page 5 of the Setup Guide"). 
+            
+            Always respond with empathy, especially if the user expresses frustration or confusion. Acknowledge their feelings respectfully. 
+            
+            If you detect a potential safety, legal, or urgent escalation issue (such as safety hazards, repeated user dissatisfaction, or refund/complaint requests), advise that you will escalate to a human agent, and help collect any necessary information for a smooth transfer. 
+            
+            Whenever relevant, proactively offer helpful suggestions, best practices, or troubleshooting steps beyond what was directly asked, to demonstrate initiative. 
+            
+            After answering, always check if the user needs any further help before ending the conversation. 
+            
+            Examples of expected behavior:
+            - If the user asks vaguely: “My printer doesn’t work,” reply with clarifying questions like: “I’m sorry to hear that. Could you please tell me what model you have and describe what’s happening in more detail?”
+            - If a user is angry: “I completely understand your frustration. Let’s work together to get this resolved as quickly as possible.”
+            - If referencing documentation: “According to the Troubleshooting Guide, section 3.2, you can reset your printer by…”
+            
+            Remember: be calm, precise, and user-centered at all times.
+        """
+    )
+
+    # if reload_knowledge:
+    kb.load(recreate=recreate_knowledge)
+
+    return evaluator_agent
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..6b67090
--- /dev/null
+++ b/app.py
@@ -0,0 +1,66 @@
+import streamlit as st
+import os
+import shutil
+
+from agents.customer_service import get_customer_agents_response, init_support_agents
+from agents.customer_service_agent import get_customer_service_agent
+
+st.title("📁 Customer Assistant Chatbot")
+
+KNOWLEDGE_FOLDER = "tmp/knowledge-base"
+MEMORY_FOLDER = "tmp/memory"
+os.makedirs(KNOWLEDGE_FOLDER, exist_ok=True)
+
+
+def clear_knowledge():
+    shutil.rmtree(KNOWLEDGE_FOLDER, ignore_errors=True)
+    shutil.rmtree(MEMORY_FOLDER, ignore_errors=True)
+    os.makedirs(KNOWLEDGE_FOLDER, exist_ok=True)
+    os.makedirs(MEMORY_FOLDER, exist_ok=True)
+    # get_customer_service_agent(True, True)
+    init_support_agents(True, True)
+    st.success("Knowledge base cleared.")
+
+
+# st.sidebar.button("🗑️ Clear Knowledge Base", on_click=clear_knowledge)
+
+if "file_uploaded" not in st.session_state:
+    st.session_state.file_uploaded = None
+
+uploaded_file = st.file_uploader(" ", type=["pdf"], key="uploader")
+if uploaded_file and not uploaded_file.file_id == st.session_state.file_uploaded:
+    with st.spinner("Sto esaminando il documento..."):
+        clear_knowledge()
+        file_path = os.path.join(KNOWLEDGE_FOLDER, uploaded_file.name)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        st.success(f"Uploaded and stored **{uploaded_file.name}**")
+        # get_customer_service_agent(True)
+        init_support_agents(True)
+        st.session_state.file_uploaded = uploaded_file.file_id
+        st.rerun()
+
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+
+if prompt := st.chat_input("Come posso aiutarti?"):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+
+    with st.chat_message("assistant"):
+        with st.spinner("Sto pensando..."):
+            # chunks = get_customer_service_agent().run(prompt)
+            response, session_id = get_customer_agents_response(
+                prompt,
+                user_id="user_1",
+                session_id=st.session_state.get("session_id"),
+                history=st.session_state.messages
+            )
+        # response = chunks.content
+        st.session_state.messages.append({"role": "assistant", "content": response})
+        st.rerun()