202 lines
8.9 KiB
Python
202 lines
8.9 KiB
Python
import re
|
|
|
|
def parse_chains_md(filepath: str) -> tuple[dict, dict, dict, list, dict, dict]:
|
|
"""Lit et analyse un fichier Markdown contenant des informations sur les chaînes minérales.
|
|
|
|
Args:
|
|
filepath (str): Chemin vers le fichier Markdown à analyser.
|
|
|
|
Returns:
|
|
tuple: Un ensemble de dictionnaires et listes contenant les données extraites du fichier,
|
|
incluant les produits, composants, mineraux, chaînes et leurs descriptions détaillées.
|
|
"""
|
|
re_start_section = re.compile(r"^##\s*Chaînes\s+avec\s+risque\s+critique", re.IGNORECASE)
|
|
re_other_h2 = re.compile(r"^##\s+(?!(Chaînes\s+avec\s+risque\s+critique))")
|
|
re_chain_heading = re.compile(r"^###\s*(.+)\s*→\s*(.+)\s*→\s*(.+)$")
|
|
re_phase = re.compile(r"^\*\s*(Assemblage|Fabrication|Minerai|Extraction|Traitement)", re.IGNORECASE)
|
|
re_IHH = re.compile(r"IHH\s*[:]\s*([0-9]+(?:\.[0-9]+)?)")
|
|
re_ISG = re.compile(r"ISG\s*combiné\s*[:]\s*([0-9]+(?:\.[0-9]+)?)|ISG\s*[:]\s*([0-9]+(?:\.[0-9]+)?)", re.IGNORECASE)
|
|
re_ICS = re.compile(r"ICS\s*moyen\s*[:]\s*([0-9]+(?:\.[0-9]+)?)", re.IGNORECASE)
|
|
re_IVC = re.compile(r"IVC\s*[:]\s*([0-9]+(?:\.[0-9]+)?)", re.IGNORECASE)
|
|
|
|
produits, composants, mineraux, chains = {}, {}, {}, []
|
|
descriptions = {}
|
|
details_sections = {}
|
|
current_chain = None
|
|
current_phase = None
|
|
current_section = None
|
|
in_section = False
|
|
|
|
with open(filepath, encoding="utf-8") as f:
|
|
for raw_line in f:
|
|
line = raw_line.strip()
|
|
if not in_section:
|
|
if re_start_section.match(line):
|
|
in_section = True
|
|
continue
|
|
if re_other_h2.match(line):
|
|
break
|
|
m_chain = re_chain_heading.match(line)
|
|
if m_chain:
|
|
prod, comp, miner = map(str.strip, m_chain.groups())
|
|
produits.setdefault(prod, {"IHH_Assemblage": None, "ISG_Assemblage": None})
|
|
composants.setdefault(comp, {"IHH_Fabrication": None, "ISG_Fabrication": None})
|
|
mineraux.setdefault(miner, {
|
|
"ICS": None, "IVC": None,
|
|
"IHH_Extraction": None, "ISG_Extraction": None,
|
|
"IHH_Traitement": None, "ISG_Traitement": None
|
|
})
|
|
chains.append({"produit": prod, "composant": comp, "minerai": miner})
|
|
current_chain = {"prod": prod, "comp": comp, "miner": miner}
|
|
current_phase = None
|
|
current_section = f"{prod} → {comp} → {miner}"
|
|
descriptions[current_section] = ""
|
|
continue
|
|
if current_chain is None:
|
|
continue
|
|
m_phase = re_phase.match(line)
|
|
if m_phase:
|
|
current_phase = m_phase.group(1).capitalize()
|
|
continue
|
|
if current_phase:
|
|
p = current_chain
|
|
if current_phase == "Assemblage":
|
|
if (m := re_IHH.search(line)):
|
|
produits[p["prod"]]["IHH_Assemblage"] = float(m.group(1))
|
|
continue
|
|
if (m := re_ISG.search(line)):
|
|
raw = m.group(1) or m.group(2)
|
|
produits[p["prod"]]["ISG_Assemblage"] = float(raw)
|
|
continue
|
|
if current_phase == "Fabrication":
|
|
if (m := re_IHH.search(line)):
|
|
composants[p["comp"]]["IHH_Fabrication"] = float(m.group(1))
|
|
continue
|
|
if (m := re_ISG.search(line)):
|
|
raw = m.group(1) or m.group(2)
|
|
composants[p["comp"]]["ISG_Fabrication"] = float(raw)
|
|
continue
|
|
if current_phase == "Minerai":
|
|
if (m := re_ICS.search(line)):
|
|
mineraux[p["miner"]]["ICS"] = float(m.group(1))
|
|
continue
|
|
if (m := re_IVC.search(line)):
|
|
mineraux[p["miner"]]["IVC"] = float(m.group(1))
|
|
continue
|
|
if current_phase == "Extraction":
|
|
if (m := re_IHH.search(line)):
|
|
mineraux[p["miner"]]["IHH_Extraction"] = float(m.group(1))
|
|
continue
|
|
if (m := re_ISG.search(line)):
|
|
raw = m.group(1) or m.group(2)
|
|
mineraux[p["miner"]]["ISG_Extraction"] = float(raw)
|
|
continue
|
|
if current_phase == "Traitement":
|
|
if (m := re_IHH.search(line)):
|
|
mineraux[p["miner"]]["IHH_Traitement"] = float(m.group(1))
|
|
continue
|
|
if (m := re_ISG.search(line)):
|
|
raw = m.group(1) or m.group(2)
|
|
mineraux[p["miner"]]["ISG_Traitement"] = float(raw)
|
|
continue
|
|
else:
|
|
if current_section:
|
|
descriptions[current_section] += raw_line
|
|
|
|
# Parse detailed sections from the complete file
|
|
with open(filepath, encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
# Extract sections using regex patterns
|
|
lines = content.split('\n')
|
|
|
|
# Find section boundaries
|
|
operations_start = None
|
|
minerais_start = None
|
|
|
|
for i, line in enumerate(lines):
|
|
if line.strip() == "## Détails des opérations":
|
|
operations_start = i
|
|
elif line.strip() == "## Détails des minerais":
|
|
minerais_start = i
|
|
|
|
if operations_start is not None:
|
|
# Parse operations section (assemblage and fabrication)
|
|
operations_end = minerais_start if minerais_start else len(lines)
|
|
operations_lines = lines[operations_start:operations_end]
|
|
|
|
current_section_name = None
|
|
current_content = []
|
|
|
|
for line in operations_lines:
|
|
if line.startswith("### ") and " et " in line:
|
|
# Save previous section
|
|
if current_section_name and current_content:
|
|
details_sections[current_section_name] = '\n'.join(current_content)
|
|
|
|
# Start new section
|
|
section_title = line.replace("### ", "").strip()
|
|
if " et Assemblage" in section_title:
|
|
product_name = section_title.replace(" et Assemblage", "").strip()
|
|
current_section_name = f"{product_name}_assemblage"
|
|
elif " et Fabrication" in section_title:
|
|
component_name = section_title.replace(" et Fabrication", "").strip()
|
|
current_section_name = f"{component_name}_fabrication"
|
|
current_content = []
|
|
elif current_section_name:
|
|
current_content.append(line)
|
|
|
|
# Save last section
|
|
if current_section_name and current_content:
|
|
details_sections[current_section_name] = '\n'.join(current_content)
|
|
|
|
if minerais_start is not None:
|
|
# Parse minerais section
|
|
minerais_lines = lines[minerais_start:]
|
|
|
|
current_minerai = None
|
|
current_section_type = "general"
|
|
current_content = []
|
|
|
|
for line in minerais_lines:
|
|
if line.startswith("### ") and "→" not in line and " et " not in line:
|
|
# Save previous section
|
|
if current_minerai and current_content:
|
|
details_sections[f"{current_minerai}_{current_section_type}"] = '\n'.join(current_content)
|
|
|
|
# Start new minerai
|
|
current_minerai = line.replace("### ", "").strip()
|
|
current_section_type = "general"
|
|
current_content = []
|
|
|
|
elif line.startswith("#### Extraction"):
|
|
# Save previous section
|
|
if current_minerai and current_content:
|
|
details_sections[f"{current_minerai}_{current_section_type}"] = '\n'.join(current_content)
|
|
|
|
current_section_type = "extraction"
|
|
current_content = []
|
|
|
|
elif line.startswith("#### Traitement"):
|
|
# Save previous section
|
|
if current_minerai and current_content:
|
|
details_sections[f"{current_minerai}_{current_section_type}"] = '\n'.join(current_content)
|
|
|
|
current_section_type = "traitement"
|
|
current_content = []
|
|
|
|
elif line.startswith("## ") and current_minerai:
|
|
# End of minerais section
|
|
if current_content:
|
|
details_sections[f"{current_minerai}_{current_section_type}"] = '\n'.join(current_content)
|
|
break
|
|
|
|
elif current_minerai:
|
|
current_content.append(line)
|
|
|
|
# Save last section
|
|
if current_minerai and current_content:
|
|
details_sections[f"{current_minerai}_{current_section_type}"] = '\n'.join(current_content)
|
|
|
|
return produits, composants, mineraux, chains, descriptions, details_sections
|