31 lines
991 B
Python
31 lines
991 B
Python
# rag.py
|
||
import faiss, json, requests, numpy as np
|
||
from sentence_transformers import SentenceTransformer
|
||
|
||
INDEX = faiss.read_index("corpus.idx")
|
||
META = json.load(open("corpus.meta.json"))
|
||
EMBMOD = SentenceTransformer("WhereIsAI/bge-base-fr", device="cpu")
|
||
|
||
def ask_llm(prompt):
|
||
r = requests.post("http://127.0.0.1:11434/api/generate", json={
|
||
"model": "mistral7b-fast",
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.2, "num_predict": 512}
|
||
}, timeout=300)
|
||
return r.json()["response"]
|
||
|
||
def query(q, k=4):
|
||
v = EMBMOD.encode([q], normalize_embeddings=True)
|
||
D, I = INDEX.search(v.astype("float32"), k)
|
||
ctx = "\n\n".join(f"[{i}] {docs[I[0][i]]}" for i in range(k))
|
||
prompt = f"""<system>Tu réponds de façon concise en français.</system>
|
||
<context>{ctx}</context>
|
||
<user>{q}</user>"""
|
||
return ask_llm(prompt)
|
||
|
||
if __name__ == "__main__":
|
||
while True:
|
||
q = input("Question › ")
|
||
print(query(q))
|