Code/batch_ia/utils/files.py

import os
import re

from utils.config import (
    CORPUS_DIR
)

def strip_prefix(name):
    """Supprime le préfixe numérique éventuel d'un nom de fichier ou de dossier."""
    return re.sub(r'^\d+[-_ ]*', '', name).lower()

def find_prefixed_directory(pattern, base_path=None):
    """
    Recherche un sous-répertoire dont le nom (sans préfixe) correspond au pattern.

    Args:
        pattern: Nom du répertoire sans préfixe
        base_path: Répertoire de base où chercher

    Returns:
        Le chemin relatif du répertoire trouvé (avec préfixe) ou None
    """
    if base_path:
        search_path = os.path.join(CORPUS_DIR, base_path)
    else:
        search_path = CORPUS_DIR

    if not os.path.exists(search_path):
        # print(f"Chemin inexistant: {search_path}")
        return None

    for d in os.listdir(search_path):
        dir_path = os.path.join(search_path, d)
        if os.path.isdir(dir_path) and strip_prefix(d) == pattern.lower():
            return os.path.relpath(dir_path, CORPUS_DIR)

    # print(f"Aucun répertoire correspondant à: '{pattern}' trouvé dans {search_path}")
    return None

def find_corpus_file(pattern, base_path=None):
    """
    Recherche récursive dans le corpus d'un fichier en ignorant les préfixes numériques dans les dossiers et fichiers.

    Args:
        pattern: Chemin relatif type "sous-dossier/nom-fichier"
        base_path: Dossier de base à partir duquel chercher

    Returns:
        Chemin relatif du fichier trouvé ou None
    """

    if base_path:
        search_path = os.path.join(CORPUS_DIR, base_path)
    else:
        search_path = CORPUS_DIR

    # # print(f"Recherche de: '{pattern}' dans {search_path}")

    if not os.path.exists(search_path):
        # print(pattern)
        # print(base_path)
        # print(f"Chemin inexistant: {search_path}")
        return None

    if '/' not in pattern:
        # Recherche directe d'un fichier
        for file in os.listdir(search_path):
            if not file.endswith('.md'):
                continue
            if strip_prefix(os.path.splitext(file)[0]) == pattern.lower():
                rel_path = os.path.relpath(os.path.join(search_path, file), CORPUS_DIR)
                # # print(f"Fichier trouvé: {rel_path}")
                return rel_path
    else:
        # Séparation du chemin en dossier/fichier
        first, rest = pattern.split('/', 1)
        matched_dir = find_prefixed_directory(first, base_path)
        if matched_dir:
            return find_corpus_file(rest, matched_dir)

    # print(f"Aucun fichier correspondant à: '{pattern}' trouvé dans {base_path}.")
    return None


def read_corpus_file(file_path, remove_first_title=False, shift_titles=0):
    """
    Lit un fichier du corpus et applique les transformations demandées.

    Args:
        file_path: Chemin relatif du fichier dans le corpus
        remove_first_title: Si True, supprime la première ligne de titre
        shift_titles: Nombre de niveaux à ajouter aux titres

    Returns:
        Le contenu du fichier avec les transformations appliquées
    """
    full_path = os.path.join(CORPUS_DIR, file_path)

    if not os.path.exists(full_path):
        # print(f"Fichier non trouvé: {full_path}")
        return f"Fichier non trouvé: {file_path}"

    # # print(f"Lecture du fichier: {full_path}")
    with open(full_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Supprimer la première ligne si c'est un titre et si demandé
    if remove_first_title and lines and lines[0].startswith('#'):
        # # print(f"Suppression du titre: {lines[0].strip()}")
        lines = lines[1:]

    # Décaler les niveaux de titre si demandé
    if shift_titles > 0:
        for i in range(len(lines)):
            if lines[i].startswith('#'):
                lines[i] = '#' * shift_titles + lines[i]

    # Nettoyer les retours à la ligne superflus
    content = ''.join(lines)
    # Supprimer les retours à la ligne en fin de contenu
    content = content.rstrip('\n') + '\n'

    return content

def write_report(report, fichier):
    """Écrit le rapport généré dans le fichier spécifié."""

    report = re.sub(r'<!----.*?-->', '', report)
    report = re.sub(r'\n\n\n+', '\n\n', report)

    with open(fichier, 'w', encoding='utf-8') as f:
        f.write(report)
    # print(f"Rapport généré avec succès: {TEMPLATE_PATH}")