# rag.py import faiss, json, requests, numpy as np from sentence_transformers import SentenceTransformer INDEX = faiss.read_index("corpus.idx") META = json.load(open("corpus.meta.json")) EMBMOD = SentenceTransformer("WhereIsAI/bge-base-fr", device="cpu") def ask_llm(prompt): r = requests.post("http://127.0.0.1:11434/api/generate", json={ "model": "mistral7b-fast", "prompt": prompt, "stream": False, "options": {"temperature": 0.2, "num_predict": 512} }, timeout=300) return r.json()["response"] def query(q, k=4): v = EMBMOD.encode([q], normalize_embeddings=True) D, I = INDEX.search(v.astype("float32"), k) ctx = "\n\n".join(f"[{i}] {docs[I[0][i]]}" for i in range(k)) prompt = f"""Tu réponds de façon concise en français. {ctx} {q}""" return ask_llm(prompt) if __name__ == "__main__": while True: q = input("Question › ") print(query(q))