mise à jour update et ajout de la génération des rapports (temporaire)

2025-05-17 08:54:29 +02:00 · 2025-05-17 08:54:29 +02:00 · 33695092af
commit 33695092af
parent 427c7d26f5
4 changed files with 314 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,7 +6,8 @@
 __pycache__/
 *.pyo
 *.pyd
-.dot
+*.dot
+prompt.md

 # Ignorer cache et temporaire
 .cache/
--- a/assets/mapping.yaml
+++ b/assets/mapping.yaml
@ -0,0 +1,37 @@
+operations:
+  Extraction:
+    fiche_prefix: "Fiche minerai"
+    resource_dir: "Minerai"
+    section_parent: "Matrice des risques"
+    slug_parent: "matrices-des-risques"
+    indices:
+      IHH:
+        section_fille: "Indice de Herfindahl-Hirschmann"
+        slug_fille: "indice-de-herfindahl-hirschmann-extraction"
+  Traitement:
+    fiche_prefix: "Fiche minerai"
+    resource_dir: "Minerai"
+    section_parent: "Matrice des risques"
+    slug_parent: "matrices-des-risques"
+    indices:
+      IHH:
+        section_fille: "Indice de Herfindahl-Hirschmann"
+        slug_fille: "indice-de-herfindahl-hirschmann-traitement"
+  Assemblage:
+    fiche_prefix: "Fiche assemblage"
+    resource_dir: "Assemblage"
+    section_parent: "Matrice des risques"
+    slug_parent: "matrices-des-risques"
+    indices:
+      IHH:
+        section_fille: "Indice de Herfindahl-Hirschmann"
+        slug_fille: "indice-de-herfindahl-hirschmann-assemblage"
+  Fabrication:
+    fiche_prefix: "Fiche fabrication"
+    resource_dir: "Fabrication"
+    section_parent: "Matrice des risques"
+    slug_parent: "matrices-des-risques"
+    indices:
+      IHH:
+        section_fille: "Indice de Herfindahl-Hirschmann"
+        slug_fille: "indice-de-herfindahl-hirschmann-fabrication"
--- a/generate_corpus.py
+++ b/generate_corpus.py
@ -0,0 +1,95 @@
+import os
+import re
+import shutil
+from pathlib import Path
+
+EXCLUDE_DIRS = {"Local"}
+MAX_SECTION_LENGTH = 1200  # non utilisé ici car découpe selon présence de ###
+
+def slugify(text):
+    return re.sub(r'\W+', '-', text.strip()).strip('-').lower()
+
+def split_markdown_sections_refined(content):
+    lines = content.splitlines()
+    sections = []
+    header_level_2 = None
+    section_lines = []
+    subsections = []
+    current_subsection = None
+    inside_section = False
+
+    for line in lines:
+        if line.startswith("## "):
+            if header_level_2:
+                if current_subsection:
+                    subsections.append(current_subsection)
+                sections.append((header_level_2, section_lines, subsections))
+                section_lines, subsections = [], []
+                current_subsection = None
+            header_level_2 = line[3:].strip()
+            inside_section = True
+        elif line.startswith("### ") and inside_section:
+            if current_subsection:
+                subsections.append(current_subsection)
+            current_subsection = (line[4:].strip(), [])
+        elif inside_section:
+            if current_subsection:
+                current_subsection[1].append(line)
+            else:
+                section_lines.append(line)
+
+    if header_level_2:
+        if current_subsection:
+            subsections.append(current_subsection)
+        sections.append((header_level_2, section_lines, subsections))
+    return sections
+
+def process_markdown_file(md_path, rel_output_dir):
+    with open(md_path, encoding="utf-8") as f:
+        content = f.read()
+    sections = split_markdown_sections_refined(content)
+
+    for idx, (sec_title, sec_lines, subsections) in enumerate(sections):
+        base_name = f"{idx:02d}-{slugify(sec_title)}"
+        if subsections:
+            sec_dir = rel_output_dir / base_name
+            sec_dir.mkdir(parents=True, exist_ok=True)
+            with open(sec_dir / "_intro.md", "w", encoding="utf-8") as f_out:
+                f_out.write(f"## {sec_title}\n")
+                f_out.write("\n".join(sec_lines).strip())
+            for sub_idx, (sub_title, sub_lines) in enumerate(subsections):
+                sub_name = f"{sub_idx:02d}-{slugify(sub_title)}.md"
+                with open(sec_dir / sub_name, "w", encoding="utf-8") as f_out:
+                    f_out.write(f"### {sub_title}\n")
+                    f_out.write("\n".join(sub_lines).strip())
+        else:
+            with open(rel_output_dir / f"{base_name}.md", "w", encoding="utf-8") as f_out:
+                f_out.write(f"## {sec_title}\n")
+                f_out.write("\n".join(sec_lines).strip())
+
+def build_corpus_structure():
+    BASE_DIR = Path(__file__).resolve().parent
+    SOURCE_DIR = BASE_DIR / "Fiches"
+    DEST_DIR = BASE_DIR / "Corpus"
+
+    if DEST_DIR.exists():
+        shutil.rmtree(DEST_DIR)
+    DEST_DIR.mkdir(parents=True, exist_ok=True)
+
+    for root, _, files in os.walk(SOURCE_DIR):
+        rel_path = Path(root).relative_to(SOURCE_DIR)
+        if any(part in EXCLUDE_DIRS for part in rel_path.parts):
+            continue
+        for file in files:
+            if not file.endswith(".md") or ".md." in file:
+                continue
+            input_file = Path(root) / file
+            subdir = rel_path
+            filename_no_ext = Path(file).stem
+            output_dir = DEST_DIR / subdir / filename_no_ext
+            output_dir.mkdir(parents=True, exist_ok=True)
+            process_markdown_file(input_file, output_dir)
+
+if __name__ == "__main__":
+    build_corpus_structure()
+    print("✅ Corpus généré avec succès dans le dossier 'Corpus/'")
--- a/rapports_IA.py
+++ b/rapports_IA.py
@ -0,0 +1,180 @@
+import os
+import yaml
+import networkx as nx
+from pathlib import Path
+from networkx.drawing.nx_agraph import read_dot
+
+# Constantes de chemins
+BASE_DIR = os.path.abspath(os.path.dirname(__file__))
+ASSETS_DIR = os.path.join(BASE_DIR, 'assets')
+CONFIG_PATH = os.path.join(ASSETS_DIR, 'config.yaml')
+MAPPING_PATH = os.path.join(ASSETS_DIR, 'mapping.yaml')
+CORPUS_DIR = os.path.join(BASE_DIR, 'Corpus')
+
+
+def load_config(config_path=CONFIG_PATH):
+    """
+    Charge les seuils depuis config.yaml
+    :return: dict des seuils pour 'IHH'
+    """
+    with open(config_path, 'r', encoding='utf-8') as f:
+        cfg = yaml.safe_load(f)
+    return cfg['seuils']['IHH']
+
+
+def load_mapping(mapping_path=MAPPING_PATH):
+    """
+    Charge le mapping opérations ↔ fiches depuis mapping.yaml
+    :return: dict
+    """
+    with open(mapping_path, 'r', encoding='utf-8') as f:
+        mapping = yaml.safe_load(f)
+    return mapping['operations']
+
+
+def parse_graph(dot_path):
+    """
+    Parse le graphe DOT pour extraire les IHH
+    :return: list of dicts
+    """
+    G = read_dot(dot_path)
+    records = []
+    for node, attrs in G.nodes(data=True):
+        name = node
+        if '_' not in name:
+            continue
+        op, res = name.split('_', 1)
+        if 'ihh_pays' in attrs or 'ihh_acteurs' in attrs:
+            try:
+                ihh_p = float(attrs.get('ihh_pays', 0))
+                ihh_a = float(attrs.get('ihh_acteurs', 0))
+            except ValueError:
+                continue
+            records.append({
+                'operation': op,
+                'resource': res,
+                'ihh_pays': ihh_p,
+                'ihh_acteurs': ihh_a,
+            })
+    return records
+
+
+def classify(value, thresholds):
+    """
+    Classifie une valeur selon thresholds
+    """
+    v_max = thresholds['vert'].get('max')
+    o_min = thresholds['orange'].get('min')
+    o_max = thresholds['orange'].get('max')
+    r_min = thresholds['rouge'].get('min')
+    if v_max is not None and value < v_max:
+        return 'vert'
+    if o_min is not None and o_max is not None and o_min <= value <= o_max:
+        return 'orange'
+    if r_min is not None and value >= r_min:
+        return 'rouge'
+    return 'vert'
+
+
+def filter_alerts(records, thresholds):
+    """
+    Filtre pour orange/rouge
+    """
+    alerts = []
+    for rec in records:
+        cp = classify(rec['ihh_pays'], thresholds)
+        ca = classify(rec['ihh_acteurs'], thresholds)
+        if cp in ('orange','rouge') or ca in ('orange','rouge'):
+            rec['color_pays'] = cp
+            rec['color_acteurs'] = ca
+            alerts.append(rec)
+    return alerts
+
+
+def map_to_fiche(operation, resource, mapping):
+    """
+    Retourne le répertoire de la fiche correspondant à l'opération et ressource
+    """
+    cfg = mapping[operation]
+    res_dir = cfg['resource_dir']
+    prefix = cfg['fiche_prefix']
+    folder = f"{prefix} {resource.lower()}"
+    return os.path.join(CORPUS_DIR, res_dir, folder)
+
+
+def extract_section(fiche_dir, slug_parent, slug_fille):
+    """
+    Extrait le contenu complet de la section sans traitement
+    """
+    # Recherche du répertoire *-slug_parent
+    sec_dirs = [d for d in os.listdir(fiche_dir)
+                if d.endswith(slug_parent) and os.path.isdir(os.path.join(fiche_dir, d))]
+    if not sec_dirs:
+        return ''
+    sec_dir = os.path.join(fiche_dir, sec_dirs[0])
+    # Recherche du fichier *-slug_fille.md quel que soit le préfixe
+    files = [f for f in os.listdir(sec_dir)
+             if f.endswith(f"-{slug_fille}.md")]
+    if not files:
+        return ''
+    file_path = os.path.join(sec_dir, files[0])
+    with open(file_path, 'r', encoding='utf-8') as f:
+        return f.read().strip()
+
+
+def build_markdown(intro, entries):
+    """
+    Construit le Markdown final prêt à coller dans un prompt
+    Recopie intégralement la section sans transformation
+    """
+    lines = [intro, '']
+    for e in entries:
+        key = f"{e['operation']}_{e['resource']}"
+        lines.append(f"### {key}")
+        lines.append(f"- **IHH pays** : {e['ihh_pays']} ({e['color_pays']})")
+        lines.append(f"- **IHH acteurs** : {e['ihh_acteurs']} ({e['color_acteurs']})")
+        # Recopie brute de la section
+        for line in e['section_full'].splitlines():
+            lines.append(line)
+        lines.append('')
+    return '\n'.join(lines)
+
+
+def main(dot_path, output_path='prompt.md'):
+    thresholds = load_config()
+    mapping = load_mapping()
+    records = parse_graph(dot_path)
+    alerts = filter_alerts(records, thresholds)
+
+    enriched = []
+    for r in alerts:
+        fiche_dir = map_to_fiche(r['operation'], r['resource'], mapping)
+        full_section = extract_section(
+            fiche_dir,
+            mapping[r['operation']]['slug_parent'],
+            mapping[r['operation']]['indices']['IHH']['slug_fille']
+        )
+        enriched.append({
+            **r,
+            'section_full': full_section
+        })
+
+    intro = (
+        "Ce rapport, destiné au COMEX et aux responsables risques, a pour objectif "
+        "d’identifier et de détailler les opérations dont l’Indice de Herfindahl-Hirschmann (IHH) "
+        "présente une vulnérabilité élevée."
+    )
+
+    md = build_markdown(intro, enriched)
+    with open(output_path, 'w', encoding='utf-8') as f:
+        f.write(md)
+    print(f"Prompt généré : {output_path}")
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dot', required=True)
+    parser.add_argument('--output', default='prompt.md')
+    args = parser.parse_args()
+    main(args.dot, args.output)