From 22aec2859259c8b9d1557d172957f203da2683bd Mon Sep 17 00:00:00 2001 From: Paolo Date: Thu, 3 Jul 2025 12:58:03 +0200 Subject: [PATCH] init chatbot --- .gitignore | 3 + agents/customer_service.py | 158 +++++++++++++++++++++++++++++++ agents/customer_service_agent.py | 85 +++++++++++++++++ app.py | 66 +++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 agents/customer_service.py create mode 100644 agents/customer_service_agent.py create mode 100644 app.py diff --git a/.gitignore b/.gitignore index 5d381cc..58cf838 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,6 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +tmp + +.idea diff --git a/agents/customer_service.py b/agents/customer_service.py new file mode 100644 index 0000000..7ef4d17 --- /dev/null +++ b/agents/customer_service.py @@ -0,0 +1,158 @@ +from agno.embedder.sentence_transformer import SentenceTransformerEmbedder +import os +import json +from agno.agent import Agent, RunResponse +from agno.knowledge.pdf import PDFKnowledgeBase, PDFReader +from agno.vectordb.pgvector import PgVector, SearchType +from agno.storage.agent.sqlite import SqliteAgentStorage +from agno.memory.v2 import Memory +from agno.memory.v2.db.sqlite import SqliteMemoryDb +from agno.models.groq import Groq +from dotenv import load_dotenv + +load_dotenv() + + +def init_support_agents(reload_knowledge=False, recreate_knowledge=False): + db_url = os.getenv("DOCKER_DB_URL") + kb = PDFKnowledgeBase( + path="tmp/knowledge-base/", reader=PDFReader(chunk=True), + vector_db=PgVector( + table_name="kb_documents", db_url=db_url, search_type=SearchType.hybrid, + embedder=SentenceTransformerEmbedder(id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"), + ), + ) + + kb.load(recreate=recreate_knowledge) + + storage = SqliteAgentStorage(table_name="agent_sessions", db_file="tmp/agent_sessions.db") + memory = Memory(db=SqliteMemoryDb(table_name="user_memories", db_file="tmp/user_memory.db")) + + context_agent = Agent( + model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"), + storage=storage, + instructions=[ + """ + You are a structured context extractor. + Given a user message, output a strict JSON object with the following fields: + + { + "user_goal": string, # what the user is trying to achieve + "item_model": string | null, # product or item model, if any + "platform": string | null, # device, system, or platform + "emotional_state": string | null, # inferred emotion or sentiment + "prior_steps": string | null # steps already taken by the user + } + + - Always return valid JSON only. No explanations, no markdown. + - Use null for missing fields. + - Be conservative in assumptions. + """ + ], + ) + + knowledge_agent = Agent( + model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"), + knowledge=kb, + search_knowledge=True, + add_context=True, + instructions=[ + """You are a knowledge retriever agent. + Given a JSON context, search the knowledge base for the top 3 most relevant snippets. + + Output a plain text list like this: + + - "[Snippet 1 content]" (Document: ) + - "[Snippet 2 content]" (Document: ) + - "[Snippet 3 content]" (Document: ) + + Guidelines: + - Do not speculate or infer beyond retrieved content. + - Prefer procedural or troubleshooting instructions if the user_goal implies an action. + - Snippets should be self-contained and relevant to the user_goal, platform, or item_model. + + """ + ], + ) + + conversation_agent = Agent( + model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"), + storage=storage, + instructions=[ + """ + You are a professional AI-powered customer support assistant. Your goal is to solve the user's issue directly, clearly, and thoroughly using the provided context and knowledge base. + + Guidelines: + - Always provide **step-by-step help** whenever possible. + - Use the **knowledge base snippets** to inform your reply. Rephrase them in user-friendly language. + - Do **not** tell the user to go check a manual, a website, or external source unless the information is truly not available in the snippets. + - If specific product details are missing from the context, ask for clarification — don’t assume. + - End with a friendly question to continue support, like: + - "Did that help resolve your issue?" + - "Would you like me to guide you through that step?" + - "Is there anything else you'd like to check?" + + Example structure: + 1. Brief recap of already tried actions, based on last messages. Max 2 row. + 2. Address the issue with specifics from KB + 3. Offer additional help + + Do NOT include markdown, citations, or code unless explicitly required. + Do NOT be generic. You MUST be as specific as possible. + + If the already tried actions are more than 5 or 6, inform the user that you have informed the assistance about his problem, and they will contact him, then ask if he wants to continue in the meantime. + + All user messages are in ITALIAN. You must reply only in ITALIAN. Do not add text in english NEVER. + Adding text in english is FORBIDDEN. + """ + ], + ) + + return context_agent, knowledge_agent, conversation_agent + + +def get_customer_agents_response(user_message: str, user_id: str = "user_1", session_id: str = None, history=None): + ctx_agent, kb_agent, convo_agent = init_support_agents() + + ctx_run: RunResponse = ctx_agent.run(user_message, user_id=user_id, session_id=session_id) + ctx_text = ctx_run.content.strip() + try: + ctx = json.loads(ctx_text) + except json.JSONDecodeError: + ctx = {} + + kb_prompt = json.dumps(ctx) + kb_run: RunResponse = kb_agent.run(kb_prompt, user_id=user_id, session_id=session_id) + kb_text = kb_run.content.strip() + + history_lines = [] + if history: + for msg in history[-4:]: + role = "Utente" if msg["role"] == "user" else "Assistente" + history_lines.append(f"{role}: {msg['content'].strip()}") + history_str = "\n".join(history_lines) + + convo_prompt = f""" + Sei un assistente virtuale professionale. + + Tieni conto del contesto, della cronologia della conversazione e degli snippet della knowledge base. + + CONTESTO ESTRATTO: + {json.dumps(ctx, indent=2, ensure_ascii=False)} + + SNIPPET DELLA KNOWLEDGE BASE: + {kb_text} + + STORIA DELLA CONVERSAZIONE: + {history_str} + + NUOVA DOMANDA DELL'UTENTE: + Utente: {user_message} + + Rispondi in modo utile e dettagliato. Non fare riferimento a fonti esterne. Rispondi solo in ITALIANO. + """ + + convo_run: RunResponse = convo_agent.run(convo_prompt, user_id=user_id, session_id=session_id) + return convo_run.content.strip(), convo_run.session_id + + diff --git a/agents/customer_service_agent.py b/agents/customer_service_agent.py new file mode 100644 index 0000000..e1f9471 --- /dev/null +++ b/agents/customer_service_agent.py @@ -0,0 +1,85 @@ +import os + +from agno.agent import Agent +from agno.document.reader.pdf_reader import PDFReader +from agno.embedder.sentence_transformer import SentenceTransformerEmbedder +from agno.knowledge.pdf import PDFKnowledgeBase +from agno.memory import AgentMemory +from agno.memory.classifier import MemoryClassifier +from agno.memory.summarizer import MemorySummarizer +from agno.memory.v2 import Memory +from agno.memory.v2.db.sqlite import SqliteMemoryDb +from agno.models.groq import Groq +from agno.storage.agent.sqlite import SqliteAgentStorage +from agno.vectordb.pgvector import PgVector +from agno.vectordb.search import SearchType +from dotenv import load_dotenv + +load_dotenv() + + +def get_customer_service_agent(reload_knowledge=False, recreate_knowledge=False): + docker_url = os.getenv("DOCKER_DB_URL") + + vector_db = PgVector( + table_name="agent_kb", + search_type=SearchType.hybrid, + db_url=docker_url, + embedder=SentenceTransformerEmbedder(id="sentence-transformers/all-MiniLM-L6-v2"), + auto_upgrade_schema=True, + ) + + kb = PDFKnowledgeBase( + path="tmp/knowledge-base/", + reader=PDFReader(chunk_size=500), + vector_db=vector_db, + ) + + storage = SqliteAgentStorage(table_name="agent_sessions", db_file="tmp/agent_memory/agno_agent_storage.db") + + memory = Memory( + # model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"), + db=SqliteMemoryDb(table_name="user_memories", db_file="tmp/memory/agent.db"), + ) + + evaluator_agent = Agent( + model=Groq(id="meta-llama/llama-4-maverick-17b-128e-instruct"), + storage=storage, + num_history_responses=3, + add_history_to_messages=True, + knowledge=kb, + search_knowledge=True, + memory=memory, + read_chat_history=True, + instructions=""" + You are a highly skilled, professional, and empathetic customer support agent. + Engage naturally and build rapport with the user while maintaining a polite and supportive tone. + Your style is professional yet approachable, concise yet thorough. + + When you receive a user question: + - Carefully analyze it and think step by step before responding. + - If the question is ambiguous or lacks detail, politely ask clarifying questions. + - Use your knowledge base to enrich your reply with verified and up-to-date information. + - When referencing knowledge, explicitly cite the section or document name when possible (e.g., "according to page 5 of the Setup Guide"). + + Always respond with empathy, especially if the user expresses frustration or confusion. Acknowledge their feelings respectfully. + + If you detect a potential safety, legal, or urgent escalation issue (such as safety hazards, repeated user dissatisfaction, or refund/complaint requests), advise that you will escalate to a human agent, and help collect any necessary information for a smooth transfer. + + Whenever relevant, proactively offer helpful suggestions, best practices, or troubleshooting steps beyond what was directly asked, to demonstrate initiative. + + After answering, always check if the user needs any further help before ending the conversation. + + Examples of expected behavior: + - If the user asks vaguely: “My printer doesn’t work,” reply with clarifying questions like: “I’m sorry to hear that. Could you please tell me what model you have and describe what’s happening in more detail?” + - If a user is angry: “I completely understand your frustration. Let’s work together to get this resolved as quickly as possible.” + - If referencing documentation: “According to the Troubleshooting Guide, section 3.2, you can reset your printer by…” + + Remember: be calm, precise, and user-centered at all times. + """ + ) + + # if reload_knowledge: + kb.load(recreate=recreate_knowledge) + + return evaluator_agent diff --git a/app.py b/app.py new file mode 100644 index 0000000..6b67090 --- /dev/null +++ b/app.py @@ -0,0 +1,66 @@ +import streamlit as st +import os +import shutil + +from agents.customer_service import get_customer_agents_response, init_support_agents +from agents.customer_service_agent import get_customer_service_agent + +st.title("📁 Customer Assistant Chatbot") + +KNOWLEDGE_FOLDER = "tmp/knowledge-base" +MEMORY_FOLDER = "tmp/memory" +os.makedirs(KNOWLEDGE_FOLDER, exist_ok=True) + + +def clear_knowledge(): + shutil.rmtree(KNOWLEDGE_FOLDER, ignore_errors=True) + shutil.rmtree(MEMORY_FOLDER, ignore_errors=True) + os.makedirs(KNOWLEDGE_FOLDER, exist_ok=True) + os.makedirs(MEMORY_FOLDER, exist_ok=True) + # get_customer_service_agent(True, True) + init_support_agents(True, True) + st.success("Knowledge base cleared.") + + +# st.sidebar.button("🗑️ Clear Knowledge Base", on_click=clear_knowledge) + +if "file_uploaded" not in st.session_state: + st.session_state.file_uploaded = None + +uploaded_file = st.file_uploader(" ", type=["pdf"], key="uploader") +if uploaded_file and not uploaded_file.file_id == st.session_state.file_uploaded: + with st.spinner("Sto esaminando il documento..."): + clear_knowledge() + file_path = os.path.join(KNOWLEDGE_FOLDER, uploaded_file.name) + with open(file_path, "wb") as f: + f.write(uploaded_file.getbuffer()) + st.success(f"Uploaded and stored **{uploaded_file.name}**") + # get_customer_service_agent(True) + init_support_agents(True) + st.session_state.file_uploaded = uploaded_file.file_id + st.rerun() + +if "messages" not in st.session_state: + st.session_state.messages = [] + +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +if prompt := st.chat_input("Come posso aiutarti?"): + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + with st.chat_message("assistant"): + with st.spinner("Sto pensando..."): + # chunks = get_customer_service_agent().run(prompt) + response, session_id = get_customer_agents_response( + prompt, + user_id="user_1", + session_id=st.session_state.get("session_id"), + history=st.session_state.messages + ) + # response = chunks.content + st.session_state.messages.append({"role": "assistant", "content": response}) + st.rerun()