diff --git a/index.py b/index.py
new file mode 100644
index 0000000..1623597
--- /dev/null
+++ b/index.py
@@ -0,0 +1,38 @@
+# index.py  (exécuter dans un venv ou un conteneur python:3.11-slim)
+
+from pathlib import Path
+from sentence_transformers import SentenceTransformer
+import faiss, json, re
+
+MODEL_NAME = "WhereIsAI/bge-base-fr"
+CHUNK = 800          # tokens environ ~600 mots
+OVERLAP = 100        # lissage
+
+def split(text):
+    # coupe proprement sur phrase/ponctuation
+    sentences = re.split(r'(?<=[\.\!\?]) +', text)
+    chunks, buf = [], []
+    for s in sentences:
+        buf.append(s)
+        if len(" ".join(buf)) > CHUNK:
+            chunks.append(" ".join(buf))
+            buf = buf[-OVERLAP:]
+    if buf: chunks.append(" ".join(buf))
+    return chunks
+
+docs, meta = [], []
+for fp in Path("/app/Fiches").rglob("*.md"):
+    txt = fp.read_text(encoding="utf-8")
+    for i, chunk in enumerate(split(txt)):
+        docs.append(chunk)
+        meta.append({"file": fp.name, "part": i})
+
+model = SentenceTransformer(MODEL_NAME, device="cpu")
+emb = model.encode(docs, batch_size=64, show_progress_bar=True, normalize_embeddings=True)
+
+index = faiss.IndexFlatIP(emb.shape[1])
+index.add(emb)
+
+faiss.write_index(index, "corpus.idx")
+json.dump(meta, open("corpus.meta.json", "w"))
+print(f"Indexé {len(docs)} passages.")
diff --git a/rag.py b/rag.py
new file mode 100644
index 0000000..a6ba57d
--- /dev/null
+++ b/rag.py
@@ -0,0 +1,30 @@
+# rag.py
+import faiss, json, requests, numpy as np
+from sentence_transformers import SentenceTransformer
+
+INDEX   = faiss.read_index("corpus.idx")
+META    = json.load(open("corpus.meta.json"))
+EMBMOD  = SentenceTransformer("WhereIsAI/bge-base-fr", device="cpu")
+
+def ask_llm(prompt):
+    r = requests.post("http://127.0.0.1:11434/api/generate", json={
+        "model": "mistral7b-fast",
+        "prompt": prompt,
+        "stream": False,
+        "options": {"temperature": 0.2, "num_predict": 512}
+    }, timeout=300)
+    return r.json()["response"]
+
+def query(q, k=4):
+    v = EMBMOD.encode([q], normalize_embeddings=True)
+    D, I = INDEX.search(v.astype("float32"), k)
+    ctx = "\n\n".join(f"[{i}] {docs[I[0][i]]}" for i in range(k))
+    prompt = f"""<system>Tu réponds de façon concise en français.</system>
+<context>{ctx}</context>
+<user>{q}</user>"""
+    return ask_llm(prompt)
+
+if __name__ == "__main__":
+    while True:
+        q = input("Question › ")
+        print(query(q))