# rag.py
import faiss, json, requests, numpy as np
from sentence_transformers import SentenceTransformer
INDEX = faiss.read_index("corpus.idx")
META = json.load(open("corpus.meta.json"))
EMBMOD = SentenceTransformer("WhereIsAI/bge-base-fr", device="cpu")
def ask_llm(prompt):
r = requests.post("http://127.0.0.1:11434/api/generate", json={
"model": "mistral7b-fast",
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.2, "num_predict": 512}
}, timeout=300)
return r.json()["response"]
def query(q, k=4):
v = EMBMOD.encode([q], normalize_embeddings=True)
D, I = INDEX.search(v.astype("float32"), k)
ctx = "\n\n".join(f"[{i}] {docs[I[0][i]]}" for i in range(k))
prompt = f"""Tu réponds de façon concise en français.
{ctx}
{q}"""
return ask_llm(prompt)
if __name__ == "__main__":
while True:
q = input("Question › ")
print(query(q))