Update index.py
This commit is contained in:
parent
f0f87b64f4
commit
3f2f13b65f
4
index.py
4
index.py
@ -45,8 +45,10 @@ def gather_files(root: Path):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
docs, meta = [], []
|
docs, meta = [], []
|
||||||
|
files_count = 0
|
||||||
|
|
||||||
for fp in gather_files(ROOT):
|
for fp in gather_files(ROOT):
|
||||||
|
files_count += 1
|
||||||
text = fp.read_text(encoding="utf-8", errors="ignore")
|
text = fp.read_text(encoding="utf-8", errors="ignore")
|
||||||
for i, chunk in enumerate(split(text)):
|
for i, chunk in enumerate(split(text)):
|
||||||
docs.append(chunk)
|
docs.append(chunk)
|
||||||
@ -55,7 +57,7 @@ def main():
|
|||||||
if not docs:
|
if not docs:
|
||||||
raise SystemExit("Aucun fichier trouvé dans /app/Fiches. Vérifiez le montage ou les extensions.")
|
raise SystemExit("Aucun fichier trouvé dans /app/Fiches. Vérifiez le montage ou les extensions.")
|
||||||
|
|
||||||
print(f"Découpé {len(docs)} passages, génération des embeddings…")
|
print(f"Traité {files_count} fichiers, découpé {len(docs)} passages, génération des embeddings…")
|
||||||
|
|
||||||
model = BGEM3FlagModel(MODEL_NAME, device="cpu")
|
model = BGEM3FlagModel(MODEL_NAME, device="cpu")
|
||||||
emb = model.encode(docs, batch_size=64) # pas de normalisation interne
|
emb = model.encode(docs, batch_size=64) # pas de normalisation interne
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user