#!/usr/bin/env python3 import faiss, json, requests, readline, numpy as np from rich import print from FlagEmbedding import BGEM3FlagModel # --- chargements ------------------------------------------------------------- idx = faiss.read_index("corpus.idx") meta = json.load(open("corpus.meta.json")) model = BGEM3FlagModel("BAAI/bge-m3", device="cpu") # même qu’à l’indexation # simple aide mémoire pour retrouver rapidement un passage def fetch_passage(i): m = meta[i] return f"[{m['file']} · part {m['part']}] {m['text']}" def ask_llm(prompt): r = requests.post("http://127.0.0.1:11434/api/generate", json={ "model": "mistral7b-fast", "prompt": prompt, "stream": False, "options": {"temperature":0.2, "num_predict":512} }, timeout=300) return r.json()["response"] # --- boucle interactive ------------------------------------------------------ while True: try: q = input("❓ > ").strip() if not q: continue except (KeyboardInterrupt, EOFError): print("\nBye."); break # embeddings & recherche FAISS (top-k=4) q_emb = model.encode([q], normalize_embeddings=True) D, I = idx.search(q_emb.astype("float32"), 4) ctx_blocks = [] for rank, idx_id in enumerate(I[0]): ctx_blocks.append(fetch_passage(idx_id)) context = "\n\n".join(ctx_blocks) prompt = f"""Réponds en français, précis et factuel. {context} {q}""" print("\n[bold]Réponse :[/]\n") print(ask_llm(prompt)) print("\n[dim]--- contexte utilisé ---[/]") print(context)