mise à jour update et ajout de la génération des rapports (temporaire)
This commit is contained in:
parent
427c7d26f5
commit
33695092af
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,7 +6,8 @@
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyo
|
*.pyo
|
||||||
*.pyd
|
*.pyd
|
||||||
.dot
|
*.dot
|
||||||
|
prompt.md
|
||||||
|
|
||||||
# Ignorer cache et temporaire
|
# Ignorer cache et temporaire
|
||||||
.cache/
|
.cache/
|
||||||
|
|||||||
37
assets/mapping.yaml
Normal file
37
assets/mapping.yaml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
operations:
|
||||||
|
Extraction:
|
||||||
|
fiche_prefix: "Fiche minerai"
|
||||||
|
resource_dir: "Minerai"
|
||||||
|
section_parent: "Matrice des risques"
|
||||||
|
slug_parent: "matrices-des-risques"
|
||||||
|
indices:
|
||||||
|
IHH:
|
||||||
|
section_fille: "Indice de Herfindahl-Hirschmann"
|
||||||
|
slug_fille: "indice-de-herfindahl-hirschmann-extraction"
|
||||||
|
Traitement:
|
||||||
|
fiche_prefix: "Fiche minerai"
|
||||||
|
resource_dir: "Minerai"
|
||||||
|
section_parent: "Matrice des risques"
|
||||||
|
slug_parent: "matrices-des-risques"
|
||||||
|
indices:
|
||||||
|
IHH:
|
||||||
|
section_fille: "Indice de Herfindahl-Hirschmann"
|
||||||
|
slug_fille: "indice-de-herfindahl-hirschmann-traitement"
|
||||||
|
Assemblage:
|
||||||
|
fiche_prefix: "Fiche assemblage"
|
||||||
|
resource_dir: "Assemblage"
|
||||||
|
section_parent: "Matrice des risques"
|
||||||
|
slug_parent: "matrices-des-risques"
|
||||||
|
indices:
|
||||||
|
IHH:
|
||||||
|
section_fille: "Indice de Herfindahl-Hirschmann"
|
||||||
|
slug_fille: "indice-de-herfindahl-hirschmann-assemblage"
|
||||||
|
Fabrication:
|
||||||
|
fiche_prefix: "Fiche fabrication"
|
||||||
|
resource_dir: "Fabrication"
|
||||||
|
section_parent: "Matrice des risques"
|
||||||
|
slug_parent: "matrices-des-risques"
|
||||||
|
indices:
|
||||||
|
IHH:
|
||||||
|
section_fille: "Indice de Herfindahl-Hirschmann"
|
||||||
|
slug_fille: "indice-de-herfindahl-hirschmann-fabrication"
|
||||||
95
generate_corpus.py
Normal file
95
generate_corpus.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
EXCLUDE_DIRS = {"Local"}
|
||||||
|
MAX_SECTION_LENGTH = 1200 # non utilisé ici car découpe selon présence de ###
|
||||||
|
|
||||||
|
def slugify(text):
|
||||||
|
return re.sub(r'\W+', '-', text.strip()).strip('-').lower()
|
||||||
|
|
||||||
|
def split_markdown_sections_refined(content):
|
||||||
|
lines = content.splitlines()
|
||||||
|
sections = []
|
||||||
|
header_level_2 = None
|
||||||
|
section_lines = []
|
||||||
|
subsections = []
|
||||||
|
current_subsection = None
|
||||||
|
inside_section = False
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("## "):
|
||||||
|
if header_level_2:
|
||||||
|
if current_subsection:
|
||||||
|
subsections.append(current_subsection)
|
||||||
|
sections.append((header_level_2, section_lines, subsections))
|
||||||
|
section_lines, subsections = [], []
|
||||||
|
current_subsection = None
|
||||||
|
header_level_2 = line[3:].strip()
|
||||||
|
inside_section = True
|
||||||
|
elif line.startswith("### ") and inside_section:
|
||||||
|
if current_subsection:
|
||||||
|
subsections.append(current_subsection)
|
||||||
|
current_subsection = (line[4:].strip(), [])
|
||||||
|
elif inside_section:
|
||||||
|
if current_subsection:
|
||||||
|
current_subsection[1].append(line)
|
||||||
|
else:
|
||||||
|
section_lines.append(line)
|
||||||
|
|
||||||
|
if header_level_2:
|
||||||
|
if current_subsection:
|
||||||
|
subsections.append(current_subsection)
|
||||||
|
sections.append((header_level_2, section_lines, subsections))
|
||||||
|
return sections
|
||||||
|
|
||||||
|
def process_markdown_file(md_path, rel_output_dir):
|
||||||
|
with open(md_path, encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
sections = split_markdown_sections_refined(content)
|
||||||
|
|
||||||
|
for idx, (sec_title, sec_lines, subsections) in enumerate(sections):
|
||||||
|
base_name = f"{idx:02d}-{slugify(sec_title)}"
|
||||||
|
if subsections:
|
||||||
|
sec_dir = rel_output_dir / base_name
|
||||||
|
sec_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(sec_dir / "_intro.md", "w", encoding="utf-8") as f_out:
|
||||||
|
f_out.write(f"## {sec_title}\n")
|
||||||
|
f_out.write("\n".join(sec_lines).strip())
|
||||||
|
for sub_idx, (sub_title, sub_lines) in enumerate(subsections):
|
||||||
|
sub_name = f"{sub_idx:02d}-{slugify(sub_title)}.md"
|
||||||
|
with open(sec_dir / sub_name, "w", encoding="utf-8") as f_out:
|
||||||
|
f_out.write(f"### {sub_title}\n")
|
||||||
|
f_out.write("\n".join(sub_lines).strip())
|
||||||
|
else:
|
||||||
|
with open(rel_output_dir / f"{base_name}.md", "w", encoding="utf-8") as f_out:
|
||||||
|
f_out.write(f"## {sec_title}\n")
|
||||||
|
f_out.write("\n".join(sec_lines).strip())
|
||||||
|
|
||||||
|
def build_corpus_structure():
|
||||||
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
SOURCE_DIR = BASE_DIR / "Fiches"
|
||||||
|
DEST_DIR = BASE_DIR / "Corpus"
|
||||||
|
|
||||||
|
if DEST_DIR.exists():
|
||||||
|
shutil.rmtree(DEST_DIR)
|
||||||
|
DEST_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for root, _, files in os.walk(SOURCE_DIR):
|
||||||
|
rel_path = Path(root).relative_to(SOURCE_DIR)
|
||||||
|
if any(part in EXCLUDE_DIRS for part in rel_path.parts):
|
||||||
|
continue
|
||||||
|
for file in files:
|
||||||
|
if not file.endswith(".md") or ".md." in file:
|
||||||
|
continue
|
||||||
|
input_file = Path(root) / file
|
||||||
|
subdir = rel_path
|
||||||
|
filename_no_ext = Path(file).stem
|
||||||
|
output_dir = DEST_DIR / subdir / filename_no_ext
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
process_markdown_file(input_file, output_dir)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
build_corpus_structure()
|
||||||
|
print("✅ Corpus généré avec succès dans le dossier 'Corpus/'")
|
||||||
180
rapports_IA.py
Normal file
180
rapports_IA.py
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import networkx as nx
|
||||||
|
from pathlib import Path
|
||||||
|
from networkx.drawing.nx_agraph import read_dot
|
||||||
|
|
||||||
|
# Constantes de chemins
|
||||||
|
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
ASSETS_DIR = os.path.join(BASE_DIR, 'assets')
|
||||||
|
CONFIG_PATH = os.path.join(ASSETS_DIR, 'config.yaml')
|
||||||
|
MAPPING_PATH = os.path.join(ASSETS_DIR, 'mapping.yaml')
|
||||||
|
CORPUS_DIR = os.path.join(BASE_DIR, 'Corpus')
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(config_path=CONFIG_PATH):
|
||||||
|
"""
|
||||||
|
Charge les seuils depuis config.yaml
|
||||||
|
:return: dict des seuils pour 'IHH'
|
||||||
|
"""
|
||||||
|
with open(config_path, 'r', encoding='utf-8') as f:
|
||||||
|
cfg = yaml.safe_load(f)
|
||||||
|
return cfg['seuils']['IHH']
|
||||||
|
|
||||||
|
|
||||||
|
def load_mapping(mapping_path=MAPPING_PATH):
|
||||||
|
"""
|
||||||
|
Charge le mapping opérations ↔ fiches depuis mapping.yaml
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
with open(mapping_path, 'r', encoding='utf-8') as f:
|
||||||
|
mapping = yaml.safe_load(f)
|
||||||
|
return mapping['operations']
|
||||||
|
|
||||||
|
|
||||||
|
def parse_graph(dot_path):
|
||||||
|
"""
|
||||||
|
Parse le graphe DOT pour extraire les IHH
|
||||||
|
:return: list of dicts
|
||||||
|
"""
|
||||||
|
G = read_dot(dot_path)
|
||||||
|
records = []
|
||||||
|
for node, attrs in G.nodes(data=True):
|
||||||
|
name = node
|
||||||
|
if '_' not in name:
|
||||||
|
continue
|
||||||
|
op, res = name.split('_', 1)
|
||||||
|
if 'ihh_pays' in attrs or 'ihh_acteurs' in attrs:
|
||||||
|
try:
|
||||||
|
ihh_p = float(attrs.get('ihh_pays', 0))
|
||||||
|
ihh_a = float(attrs.get('ihh_acteurs', 0))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
records.append({
|
||||||
|
'operation': op,
|
||||||
|
'resource': res,
|
||||||
|
'ihh_pays': ihh_p,
|
||||||
|
'ihh_acteurs': ihh_a,
|
||||||
|
})
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def classify(value, thresholds):
|
||||||
|
"""
|
||||||
|
Classifie une valeur selon thresholds
|
||||||
|
"""
|
||||||
|
v_max = thresholds['vert'].get('max')
|
||||||
|
o_min = thresholds['orange'].get('min')
|
||||||
|
o_max = thresholds['orange'].get('max')
|
||||||
|
r_min = thresholds['rouge'].get('min')
|
||||||
|
if v_max is not None and value < v_max:
|
||||||
|
return 'vert'
|
||||||
|
if o_min is not None and o_max is not None and o_min <= value <= o_max:
|
||||||
|
return 'orange'
|
||||||
|
if r_min is not None and value >= r_min:
|
||||||
|
return 'rouge'
|
||||||
|
return 'vert'
|
||||||
|
|
||||||
|
|
||||||
|
def filter_alerts(records, thresholds):
|
||||||
|
"""
|
||||||
|
Filtre pour orange/rouge
|
||||||
|
"""
|
||||||
|
alerts = []
|
||||||
|
for rec in records:
|
||||||
|
cp = classify(rec['ihh_pays'], thresholds)
|
||||||
|
ca = classify(rec['ihh_acteurs'], thresholds)
|
||||||
|
if cp in ('orange','rouge') or ca in ('orange','rouge'):
|
||||||
|
rec['color_pays'] = cp
|
||||||
|
rec['color_acteurs'] = ca
|
||||||
|
alerts.append(rec)
|
||||||
|
return alerts
|
||||||
|
|
||||||
|
|
||||||
|
def map_to_fiche(operation, resource, mapping):
|
||||||
|
"""
|
||||||
|
Retourne le répertoire de la fiche correspondant à l'opération et ressource
|
||||||
|
"""
|
||||||
|
cfg = mapping[operation]
|
||||||
|
res_dir = cfg['resource_dir']
|
||||||
|
prefix = cfg['fiche_prefix']
|
||||||
|
folder = f"{prefix} {resource.lower()}"
|
||||||
|
return os.path.join(CORPUS_DIR, res_dir, folder)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_section(fiche_dir, slug_parent, slug_fille):
|
||||||
|
"""
|
||||||
|
Extrait le contenu complet de la section sans traitement
|
||||||
|
"""
|
||||||
|
# Recherche du répertoire *-slug_parent
|
||||||
|
sec_dirs = [d for d in os.listdir(fiche_dir)
|
||||||
|
if d.endswith(slug_parent) and os.path.isdir(os.path.join(fiche_dir, d))]
|
||||||
|
if not sec_dirs:
|
||||||
|
return ''
|
||||||
|
sec_dir = os.path.join(fiche_dir, sec_dirs[0])
|
||||||
|
# Recherche du fichier *-slug_fille.md quel que soit le préfixe
|
||||||
|
files = [f for f in os.listdir(sec_dir)
|
||||||
|
if f.endswith(f"-{slug_fille}.md")]
|
||||||
|
if not files:
|
||||||
|
return ''
|
||||||
|
file_path = os.path.join(sec_dir, files[0])
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
return f.read().strip()
|
||||||
|
|
||||||
|
|
||||||
|
def build_markdown(intro, entries):
|
||||||
|
"""
|
||||||
|
Construit le Markdown final prêt à coller dans un prompt
|
||||||
|
Recopie intégralement la section sans transformation
|
||||||
|
"""
|
||||||
|
lines = [intro, '']
|
||||||
|
for e in entries:
|
||||||
|
key = f"{e['operation']}_{e['resource']}"
|
||||||
|
lines.append(f"### {key}")
|
||||||
|
lines.append(f"- **IHH pays** : {e['ihh_pays']} ({e['color_pays']})")
|
||||||
|
lines.append(f"- **IHH acteurs** : {e['ihh_acteurs']} ({e['color_acteurs']})")
|
||||||
|
# Recopie brute de la section
|
||||||
|
for line in e['section_full'].splitlines():
|
||||||
|
lines.append(line)
|
||||||
|
lines.append('')
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def main(dot_path, output_path='prompt.md'):
|
||||||
|
thresholds = load_config()
|
||||||
|
mapping = load_mapping()
|
||||||
|
records = parse_graph(dot_path)
|
||||||
|
alerts = filter_alerts(records, thresholds)
|
||||||
|
|
||||||
|
enriched = []
|
||||||
|
for r in alerts:
|
||||||
|
fiche_dir = map_to_fiche(r['operation'], r['resource'], mapping)
|
||||||
|
full_section = extract_section(
|
||||||
|
fiche_dir,
|
||||||
|
mapping[r['operation']]['slug_parent'],
|
||||||
|
mapping[r['operation']]['indices']['IHH']['slug_fille']
|
||||||
|
)
|
||||||
|
enriched.append({
|
||||||
|
**r,
|
||||||
|
'section_full': full_section
|
||||||
|
})
|
||||||
|
|
||||||
|
intro = (
|
||||||
|
"Ce rapport, destiné au COMEX et aux responsables risques, a pour objectif "
|
||||||
|
"d’identifier et de détailler les opérations dont l’Indice de Herfindahl-Hirschmann (IHH) "
|
||||||
|
"présente une vulnérabilité élevée."
|
||||||
|
)
|
||||||
|
|
||||||
|
md = build_markdown(intro, enriched)
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(md)
|
||||||
|
print(f"Prompt généré : {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--dot', required=True)
|
||||||
|
parser.add_argument('--output', default='prompt.md')
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args.dot, args.output)
|
||||||
Loading…
x
Reference in New Issue
Block a user