homelab_automation/app/utils/markdown_parser.py

"""
Parser HTML vers Markdown pour la documentation d'aide.
"""

import re
from html.parser import HTMLParser
from pathlib import Path
from typing import List, Tuple


class HelpHtmlToMarkdownParser(HTMLParser):
    """Parser pour convertir le HTML de documentation en Markdown."""

    def __init__(self):
        super().__init__()
        self.markdown_lines: List[str] = []
        self.current_text = ""
        self.in_code = False
        self.in_pre = False
        self.in_list = False
        self.list_type = "ul"
        self.list_level = 0
        self.in_table = False
        self.table_row: List[str] = []
        self.in_th = False
        self.ignore_content = False
        self.tag_stack: List[str] = []

    def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
        self.tag_stack.append(tag)
        attrs_dict = dict(attrs)

        if tag in ["script", "style", "nav", "footer"]:
            self.ignore_content = True
            return

        if tag == "h1":
            self._flush_text()
            self.markdown_lines.append("\n# ")
        elif tag == "h2":
            self._flush_text()
            self.markdown_lines.append("\n## ")
        elif tag == "h3":
            self._flush_text()
            self.markdown_lines.append("\n### ")
        elif tag == "h4":
            self._flush_text()
            self.markdown_lines.append("\n#### ")
        elif tag == "p":
            self._flush_text()
            self.markdown_lines.append("\n")
        elif tag == "br":
            self.markdown_lines.append("\n")
        elif tag == "strong" or tag == "b":
            self.current_text += "**"
        elif tag == "em" or tag == "i":
            self.current_text += "*"
        elif tag == "code":
            if not self.in_pre:
                self.current_text += "`"
            self.in_code = True
        elif tag == "pre":
            self._flush_text()
            self.in_pre = True
            self.markdown_lines.append("\n```\n")
        elif tag == "ul":
            self._flush_text()
            self.in_list = True
            self.list_type = "ul"
            self.list_level += 1
        elif tag == "ol":
            self._flush_text()
            self.in_list = True
            self.list_type = "ol"
            self.list_level += 1
        elif tag == "li":
            self._flush_text()
            indent = "  " * (self.list_level - 1)
            if self.list_type == "ul":
                self.markdown_lines.append(f"\n{indent}- ")
            else:
                self.markdown_lines.append(f"\n{indent}1. ")
        elif tag == "a":
            href = attrs_dict.get("href", "")
            self.current_text += "["
            self._href_pending = href
        elif tag == "table":
            self._flush_text()
            self.in_table = True
            self.markdown_lines.append("\n")
        elif tag == "tr":
            self.table_row = []
        elif tag == "th":
            self.in_th = True
        elif tag == "td":
            pass
        elif tag == "hr":
            self._flush_text()
            self.markdown_lines.append("\n---\n")
        elif tag == "blockquote":
            self._flush_text()
            self.markdown_lines.append("\n> ")

    def handle_endtag(self, tag: str):
        if self.tag_stack and self.tag_stack[-1] == tag:
            self.tag_stack.pop()

        if tag in ["script", "style", "nav", "footer"]:
            self.ignore_content = False
            return

        if tag in ["h1", "h2", "h3", "h4"]:
            self._flush_text()
            self.markdown_lines.append("\n")
        elif tag == "p":
            self._flush_text()
            self.markdown_lines.append("\n")
        elif tag == "strong" or tag == "b":
            self.current_text += "**"
        elif tag == "em" or tag == "i":
            self.current_text += "*"
        elif tag == "code":
            if not self.in_pre:
                self.current_text += "`"
            self.in_code = False
        elif tag == "pre":
            self._flush_text()
            self.in_pre = False
            self.markdown_lines.append("```\n")
        elif tag == "ul" or tag == "ol":
            self._flush_text()
            self.list_level -= 1
            if self.list_level == 0:
                self.in_list = False
            self.markdown_lines.append("\n")
        elif tag == "li":
            self._flush_text()
        elif tag == "a":
            href = getattr(self, "_href_pending", "")
            self.current_text += f"]({href})"
            self._href_pending = ""
        elif tag == "tr":
            if self.table_row:
                self.markdown_lines.append("| " + " | ".join(self.table_row) + " |\n")
                if self.in_th:
                    # Ajouter la ligne de séparation après les en-têtes
                    self.markdown_lines.append("|" + "|".join(["---"] * len(self.table_row)) + "|\n")
                    self.in_th = False
        elif tag == "th" or tag == "td":
            self._flush_text()
            self.table_row.append(self.current_text.strip())
            self.current_text = ""
        elif tag == "table":
            self.in_table = False
            self.markdown_lines.append("\n")
        elif tag == "blockquote":
            self._flush_text()
            self.markdown_lines.append("\n")

    def handle_data(self, data: str):
        if self.ignore_content:
            return

        if self.in_pre:
            self.markdown_lines.append(data)
        else:
            # Normaliser les espaces
            text = " ".join(data.split())
            if text:
                self.current_text += text

    def _flush_text(self):
        if self.current_text.strip():
            self.markdown_lines.append(self.current_text.strip())
        self.current_text = ""

    def get_markdown(self) -> str:
        self._flush_text()
        content = "".join(self.markdown_lines)
        # Nettoyer les lignes vides multiples
        content = re.sub(r'\n{3,}', '\n\n', content)
        return content.strip()


def build_help_markdown(html_path: Path = None, html_content: str = None) -> str:
    """Construit le contenu Markdown d'aide depuis un fichier HTML.

    Args:
        html_path: Chemin vers le fichier HTML source
        html_content: Contenu HTML direct (prioritaire sur html_path)

    Returns:
        Contenu Markdown formaté
    """
    if html_content:
        content = html_content
    elif html_path and html_path.exists():
        content = html_path.read_text(encoding='utf-8')
    else:
        return _get_default_help_markdown()

    # Extraire uniquement la section d'aide si présente
    help_section_match = re.search(
        r'<section[^>]*id=["\']help["\'][^>]*>(.*?)</section>',
        content,
        re.DOTALL | re.IGNORECASE
    )

    if help_section_match:
        help_html = help_section_match.group(1)
    else:
        # Utiliser tout le body si pas de section help
        body_match = re.search(r'<body[^>]*>(.*?)</body>', content, re.DOTALL | re.IGNORECASE)
        help_html = body_match.group(1) if body_match else content

    # Parser le HTML
    parser = HelpHtmlToMarkdownParser()
    parser.feed(help_html)

    return parser.get_markdown()


def _get_default_help_markdown() -> str:
    """Retourne le contenu d'aide par défaut."""
    return """# 📚 Documentation Homelab Automation

## 🎯 Introduction

Bienvenue dans le **Homelab Automation Dashboard**, une plateforme complète pour gérer votre infrastructure homelab avec Ansible.

## 🖥️ Dashboard

Le dashboard principal affiche:
- **Hôtes en ligne**: Nombre d'hôtes actuellement accessibles
- **Tâches**: Nombre total de tâches exécutées
- **Taux de succès**: Pourcentage de tâches réussies
- **Uptime**: Disponibilité globale du système

## 🖧 Gestion des Hôtes

### Ajouter un hôte
1. Cliquez sur "Nouvel hôte"
2. Renseignez le nom et l'adresse IP
3. Sélectionnez le groupe d'environnement
4. Choisissez les groupes de rôles
5. Cliquez sur "Ajouter"

### Bootstrap
Le bootstrap configure un hôte pour Ansible:
- Création de l'utilisateur d'automatisation
- Configuration des clés SSH
- Installation de sudo et Python3

## 📋 Playbooks

Les playbooks Ansible sont organisés par catégorie:
- **Général**: Playbooks de base
- **Maintenance**: Mises à jour, redémarrages
- **Sécurité**: Configurations de sécurité
- **Monitoring**: Surveillance et alertes

## ⏰ Schedules

Planifiez l'exécution automatique de playbooks:
- **Une fois**: Exécution unique à une date/heure
- **Quotidien**: Tous les jours à une heure fixe
- **Hebdomadaire**: Certains jours de la semaine
- **Mensuel**: Un jour spécifique du mois
- **Personnalisé**: Expression cron

## 🔔 Notifications

Les notifications sont envoyées via ntfy:
- Démarrage/arrêt de l'application
- Succès/échec des tâches
- Bootstrap réussi/échoué

## 🔑 API

L'API REST est accessible avec une clé API:
```
Header: X-API-Key: votre-cle-api
```

---
*Généré par Homelab Automation Dashboard*
"""