From a569c71ad4a049d06e9afd99d9767ff92be6ec4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phan?= Date: Mon, 19 May 2025 06:19:28 +0200 Subject: [PATCH] Update rag.py --- rag.py | 57 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/rag.py b/rag.py index a6ba57d..eeba59e 100644 --- a/rag.py +++ b/rag.py @@ -1,30 +1,49 @@ -# rag.py -import faiss, json, requests, numpy as np -from sentence_transformers import SentenceTransformer +#!/usr/bin/env python3 +import faiss, json, requests, readline, numpy as np +from rich import print +from FlagEmbedding import BGEM3FlagModel -INDEX = faiss.read_index("corpus.idx") -META = json.load(open("corpus.meta.json")) -EMBMOD = SentenceTransformer("WhereIsAI/bge-base-fr", device="cpu") +# --- chargements ------------------------------------------------------------- +idx = faiss.read_index("corpus.idx") +meta = json.load(open("corpus.meta.json")) +model = BGEM3FlagModel("BAAI/bge-m3", device="cpu") # même qu’à l’indexation + +# simple aide mémoire pour retrouver rapidement un passage +def fetch_passage(i): + m = meta[i] + return f"[{m['file']} · part {m['part']}] {m['text']}" def ask_llm(prompt): r = requests.post("http://127.0.0.1:11434/api/generate", json={ "model": "mistral7b-fast", "prompt": prompt, "stream": False, - "options": {"temperature": 0.2, "num_predict": 512} + "options": {"temperature":0.2, "num_predict":512} }, timeout=300) return r.json()["response"] -def query(q, k=4): - v = EMBMOD.encode([q], normalize_embeddings=True) - D, I = INDEX.search(v.astype("float32"), k) - ctx = "\n\n".join(f"[{i}] {docs[I[0][i]]}" for i in range(k)) - prompt = f"""Tu réponds de façon concise en français. -{ctx} -{q}""" - return ask_llm(prompt) +# --- boucle interactive ------------------------------------------------------ +while True: + try: + q = input("❓ > ").strip() + if not q: continue + except (KeyboardInterrupt, EOFError): + print("\nBye."); break -if __name__ == "__main__": - while True: - q = input("Question › ") - print(query(q)) + # embeddings & recherche FAISS (top-k=4) + q_emb = model.encode([q], normalize_embeddings=True) + D, I = idx.search(q_emb.astype("float32"), 4) + + ctx_blocks = [] + for rank, idx_id in enumerate(I[0]): + ctx_blocks.append(fetch_passage(idx_id)) + context = "\n\n".join(ctx_blocks) + + prompt = f"""Réponds en français, précis et factuel. +{context} +{q}""" + + print("\n[bold]Réponse :[/]\n") + print(ask_llm(prompt)) + print("\n[dim]--- contexte utilisé ---[/]") + print(context)