773 lines
32 KiB
Python
773 lines
32 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
🦊 Foxy Dev Team — Auto-Pilot Daemon v2.3
|
||
==========================================
|
||
Auteur : Foxy Dev Team
|
||
Usage : python3 foxy-autopilot.py [--submit "desc"] [--probe] [--reset-running]
|
||
Service: systemctl --user start foxy-autopilot
|
||
|
||
Changelog v2.3:
|
||
- Fix: datetime.utcnow() → datetime.now(UTC)
|
||
- Fix: PID lock — empêche deux instances simultanées
|
||
- Fix: Probe syntaxe openclaw adapté à la vraie CLI (openclaw agent)
|
||
- Fix: Variables session X11 injectées depuis /proc/<gateway-pid>/environ
|
||
- Fix: Compteur d'échecs + suspension après MAX_CONSECUTIVE_FAILURES
|
||
- Fix: load_state avec retry pour race condition JSON
|
||
- Fix: spawn_agent défini avant process_project (NameError corrigé)
|
||
- New: check_finished_agents — détecte fin d'agent sans mise à jour du state
|
||
- New: --reset-running pour déblocage manuel
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import fcntl
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
import signal
|
||
import logging
|
||
import urllib.request
|
||
import urllib.parse
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
# ─── UTC ───────────────────────────────────────────────────────────────────────
|
||
|
||
UTC = timezone.utc
|
||
|
||
def utcnow() -> datetime:
|
||
return datetime.now(UTC)
|
||
|
||
def utcnow_iso() -> str:
|
||
return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
|
||
# ─── CONFIG ────────────────────────────────────────────────────────────────────
|
||
|
||
WORKSPACE = Path("/home/openclaw/.openclaw/workspace")
|
||
LOG_FILE = Path("/home/openclaw/.openclaw/logs/foxy-autopilot.log")
|
||
PID_FILE = Path("/home/openclaw/.openclaw/logs/foxy-autopilot.pid")
|
||
POLL_INTERVAL = 30
|
||
SPAWN_TIMEOUT = 1800
|
||
TELEGRAM_BOT = "8686313703:AAEGUunkJWbJx7njX_NUrW9HcyrZqXzA3KQ"
|
||
TELEGRAM_CHAT = "8379645618"
|
||
MAX_CONSECUTIVE_FAILURES = 3
|
||
SUSPEND_DURATION_CYCLES = 20
|
||
|
||
AGENT_LABELS = {
|
||
"Foxy-Conductor": "foxy-conductor",
|
||
"Foxy-Architect": "foxy-architect",
|
||
"Foxy-Dev": "foxy-dev",
|
||
"Foxy-UIUX": "foxy-uiux",
|
||
"Foxy-QA": "foxy-qa",
|
||
"Foxy-Admin": "foxy-admin",
|
||
}
|
||
|
||
STATUS_TRANSITIONS = {
|
||
"AWAITING_CONDUCTOR": ("Foxy-Conductor", "CONDUCTOR_RUNNING"),
|
||
"AWAITING_ARCHITECT": ("Foxy-Architect", "ARCHITECT_RUNNING"),
|
||
"AWAITING_DEV": ("Foxy-Dev", "DEV_RUNNING"),
|
||
"AWAITING_UIUX": ("Foxy-UIUX", "UIUX_RUNNING"),
|
||
"AWAITING_QA": ("Foxy-QA", "QA_RUNNING"),
|
||
"AWAITING_DEPLOY": ("Foxy-Admin", "DEPLOY_RUNNING"),
|
||
}
|
||
|
||
RUNNING_STATUSES = {
|
||
"CONDUCTOR_RUNNING", "ARCHITECT_RUNNING", "DEV_RUNNING",
|
||
"UIUX_RUNNING", "QA_RUNNING", "DEPLOY_RUNNING", "COMPLETED", "FAILED"
|
||
}
|
||
|
||
# ─── ÉTAT GLOBAL ───────────────────────────────────────────────────────────────
|
||
|
||
_process_tracker: dict[str, tuple] = {} # { slug → (Popen, agent_name) }
|
||
_failure_counter: dict[str, int] = {} # { slug → nb_echecs }
|
||
_suspended_until: dict[str, int] = {} # { slug → cycle_reprise }
|
||
_OPENCLAW_SPAWN_CMD: list[str]|None = None
|
||
_pid_lock_fh = None
|
||
|
||
# ─── LOGGING ───────────────────────────────────────────────────────────────────
|
||
|
||
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||
log = logging.getLogger("foxy-autopilot")
|
||
if not log.handlers:
|
||
log.setLevel(logging.INFO)
|
||
fmt = logging.Formatter("[%(asctime)s] %(levelname)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%SZ")
|
||
fh = logging.FileHandler(LOG_FILE)
|
||
fh.setFormatter(fmt)
|
||
sh = logging.StreamHandler(sys.stdout)
|
||
sh.setFormatter(fmt)
|
||
log.addHandler(fh)
|
||
log.addHandler(sh)
|
||
log.propagate = False
|
||
|
||
# ─── SIGNAL HANDLING ───────────────────────────────────────────────────────────
|
||
|
||
_running = True
|
||
|
||
def handle_signal(sig, frame):
|
||
global _running
|
||
log.info("🛑 Signal reçu — arrêt propre du daemon...")
|
||
_running = False
|
||
|
||
signal.signal(signal.SIGTERM, handle_signal)
|
||
signal.signal(signal.SIGINT, handle_signal)
|
||
|
||
# ─── PID LOCK ──────────────────────────────────────────────────────────────────
|
||
|
||
def acquire_pid_lock() -> bool:
|
||
global _pid_lock_fh
|
||
try:
|
||
PID_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||
_pid_lock_fh = open(PID_FILE, "w")
|
||
fcntl.flock(_pid_lock_fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||
_pid_lock_fh.write(str(os.getpid()))
|
||
_pid_lock_fh.flush()
|
||
return True
|
||
except BlockingIOError:
|
||
try:
|
||
existing = PID_FILE.read_text().strip()
|
||
print(f"❌ Une instance est déjà en cours (PID {existing}). Abandon.")
|
||
except Exception:
|
||
print("❌ Une instance est déjà en cours. Abandon.")
|
||
return False
|
||
except Exception as e:
|
||
print(f"❌ Impossible d'acquérir le PID lock: {e}")
|
||
return False
|
||
|
||
# ─── TELEGRAM ──────────────────────────────────────────────────────────────────
|
||
|
||
def notify(msg: str):
|
||
try:
|
||
url = f"https://api.telegram.org/bot{TELEGRAM_BOT}/sendMessage"
|
||
data = urllib.parse.urlencode({
|
||
"chat_id": TELEGRAM_CHAT, "text": msg, "parse_mode": "HTML"
|
||
}).encode()
|
||
req = urllib.request.Request(url, data=data, method="POST")
|
||
with urllib.request.urlopen(req, timeout=5):
|
||
pass
|
||
except Exception as e:
|
||
log.warning(f"Telegram error (ignoré): {e}")
|
||
|
||
# ─── STATE HELPERS ─────────────────────────────────────────────────────────────
|
||
|
||
def sanitize_state(state: dict, source: str) -> dict:
|
||
"""
|
||
Valide et nettoie le state chargé.
|
||
Filtre les éléments corrompus dans tasks[] (strings au lieu de dicts).
|
||
"""
|
||
tasks = state.get("tasks", [])
|
||
if not isinstance(tasks, list):
|
||
log.warning(f" ⚠️ [{source}] tasks[] n'est pas une liste — réinitialisé")
|
||
state["tasks"] = []
|
||
return state
|
||
|
||
clean = []
|
||
for i, t in enumerate(tasks):
|
||
if isinstance(t, dict):
|
||
clean.append(t)
|
||
else:
|
||
log.warning(f" ⚠️ [{source}] tasks[{i}] est un {type(t).__name__} (ignoré): {repr(t)[:80]}")
|
||
if len(clean) != len(tasks):
|
||
log.warning(f" ⚠️ [{source}] {len(tasks) - len(clean)} tâche(s) invalide(s) filtrée(s)")
|
||
state["tasks"] = clean
|
||
return state
|
||
|
||
def load_state(state_file: Path, retries: int = 3, delay: float = 0.5) -> dict|None:
|
||
"""Charge project_state.json avec retry pour absorber les race conditions."""
|
||
for attempt in range(1, retries + 1):
|
||
try:
|
||
with open(state_file) as f:
|
||
data = json.load(f)
|
||
return sanitize_state(data, state_file.parent.name)
|
||
except json.JSONDecodeError as e:
|
||
if attempt < retries:
|
||
log.debug(f"JSON invalide ({attempt}/{retries}), retry dans {delay}s: {e}")
|
||
time.sleep(delay)
|
||
else:
|
||
backup = state_file.with_suffix(".json.bak")
|
||
if backup.exists():
|
||
log.warning(f"JSON invalide dans {state_file.name} — utilisation du backup")
|
||
try:
|
||
with open(backup) as f:
|
||
data = json.load(f)
|
||
return sanitize_state(data, state_file.parent.name + ".bak")
|
||
except Exception:
|
||
pass
|
||
log.warning(f"JSON invalide dans {state_file}: {e}")
|
||
return None
|
||
except Exception as e:
|
||
log.warning(f"Erreur lecture {state_file}: {e}")
|
||
return None
|
||
return None
|
||
|
||
def save_state(state_file: Path, state: dict):
|
||
backup = state_file.with_suffix(".json.bak")
|
||
try:
|
||
if state_file.exists():
|
||
state_file.rename(backup)
|
||
with open(state_file, "w") as f:
|
||
json.dump(state, f, indent=2, ensure_ascii=False)
|
||
log.info(f"💾 State sauvegardé: {state_file.parent.name}")
|
||
except Exception as e:
|
||
log.error(f"Erreur sauvegarde {state_file}: {e}")
|
||
if backup.exists():
|
||
backup.rename(state_file)
|
||
|
||
def add_audit(state: dict, action: str, agent: str, details: str = ""):
|
||
state.setdefault("audit_log", []).append({
|
||
"timestamp": utcnow_iso(),
|
||
"action": action,
|
||
"agent": agent,
|
||
"details": details,
|
||
"source": "foxy-autopilot"
|
||
})
|
||
|
||
def mark_status(state_file: Path, state: dict, new_status: str, agent: str):
|
||
old = state.get("status", "?")
|
||
state["status"] = new_status
|
||
state["updated_at"] = utcnow_iso()
|
||
add_audit(state, "STATUS_CHANGED", agent, f"{old} → {new_status}")
|
||
save_state(state_file, state)
|
||
log.info(f" 📋 Statut: {old} → {new_status}")
|
||
|
||
# ─── PROBE SYNTAXE OPENCLAW ────────────────────────────────────────────────────
|
||
|
||
def _run_help(args: list[str], timeout: int = 8) -> str:
|
||
try:
|
||
r = subprocess.run(
|
||
args, capture_output=True, text=True, timeout=timeout,
|
||
env={**os.environ, "HOME": "/home/openclaw"}
|
||
)
|
||
return (r.stdout + r.stderr).lower()
|
||
except subprocess.TimeoutExpired:
|
||
log.warning(f" ⚠️ Timeout({timeout}s) sur: {' '.join(args[:4])}")
|
||
return ""
|
||
except Exception:
|
||
return ""
|
||
|
||
def probe_openclaw_syntax() -> list[str]|None:
|
||
which = subprocess.run(["which", "openclaw"], capture_output=True, text=True)
|
||
if which.returncode != 0:
|
||
log.error("❌ 'openclaw' introuvable dans PATH.")
|
||
return None
|
||
log.info(f"✅ openclaw trouvé : {which.stdout.strip()}")
|
||
|
||
candidates = [
|
||
(
|
||
["openclaw", "agent", "--help"],
|
||
["openclaw", "agent", "--agent", "{agent}", "--task", "{task}"],
|
||
["agent", "task"]
|
||
),
|
||
(
|
||
["openclaw", "agent", "--help"],
|
||
["openclaw", "agent", "--agent", "{agent}", "--message", "{task}"],
|
||
["agent", "message"]
|
||
),
|
||
(
|
||
["openclaw", "agent", "--help"],
|
||
["openclaw", "agent", "{agent}", "--task", "{task}"],
|
||
["agent"]
|
||
),
|
||
(
|
||
["openclaw", "agents", "run", "--help"],
|
||
["openclaw", "agents", "run", "--agent", "{agent}", "--task", "{task}"],
|
||
["agent", "task"]
|
||
),
|
||
(
|
||
["openclaw", "agents", "spawn", "--help"],
|
||
["openclaw", "agents", "spawn", "--agent", "{agent}", "--task", "{task}"],
|
||
["agent", "task"]
|
||
),
|
||
(
|
||
["openclaw", "clawbot", "--help"],
|
||
["openclaw", "clawbot", "run", "--agent", "{agent}", "--task", "{task}"],
|
||
["agent", "task"]
|
||
),
|
||
]
|
||
|
||
for help_cmd, spawn_template, keywords in candidates:
|
||
output = _run_help(help_cmd, timeout=8)
|
||
if not output:
|
||
continue
|
||
if all(kw in output for kw in keywords):
|
||
log.info(f"✅ Syntaxe openclaw détectée : {' '.join(spawn_template[:5])}")
|
||
return spawn_template
|
||
|
||
log.warning("⚠️ Aucune syntaxe connue détectée.")
|
||
log.warning(" Lance: openclaw agent --help pour voir la syntaxe réelle")
|
||
for dbg_cmd in [["openclaw", "agent", "--help"], ["openclaw", "agents", "--help"]]:
|
||
out = _run_help(dbg_cmd, timeout=8)
|
||
if out:
|
||
log.warning(f" --- {' '.join(dbg_cmd)} ---")
|
||
for line in out.splitlines()[:20]:
|
||
log.warning(f" {line}")
|
||
return None
|
||
|
||
def build_spawn_cmd(template: list[str], agent_label: str, task_msg: str) -> list[str]:
|
||
return [
|
||
t.replace("{agent}", agent_label).replace("{task}", task_msg)
|
||
for t in template
|
||
]
|
||
|
||
# ─── SESSION ENV ───────────────────────────────────────────────────────────────
|
||
|
||
def _get_session_env() -> dict:
|
||
"""
|
||
Injecte les variables de session X11/KDE dans l'environnement des agents.
|
||
Le daemon systemd --user n'a pas DBUS_SESSION_BUS_ADDRESS ni XDG_RUNTIME_DIR.
|
||
On les copie depuis le process openclaw-gateway qui tourne dans la vraie session.
|
||
"""
|
||
env = {**os.environ, "HOME": "/home/openclaw"}
|
||
|
||
try:
|
||
gw_result = subprocess.run(
|
||
["pgrep", "-u", "openclaw", "-x", "openclaw-gateway"],
|
||
capture_output=True, text=True
|
||
)
|
||
gw_pid = gw_result.stdout.strip().splitlines()[0] if gw_result.stdout.strip() else None
|
||
except Exception:
|
||
gw_pid = None
|
||
|
||
if gw_pid:
|
||
try:
|
||
with open(f"/proc/{gw_pid}/environ", "rb") as f:
|
||
raw = f.read()
|
||
SESSION_VARS = {
|
||
"DBUS_SESSION_BUS_ADDRESS", "XDG_RUNTIME_DIR", "DISPLAY",
|
||
"XAUTHORITY", "XDG_SESSION_TYPE", "XDG_CURRENT_DESKTOP",
|
||
}
|
||
injected = []
|
||
for item in raw.split(b"\x00"):
|
||
if b"=" not in item:
|
||
continue
|
||
key, _, val = item.partition(b"=")
|
||
key_str = key.decode(errors="replace")
|
||
if key_str in SESSION_VARS:
|
||
env[key_str] = val.decode(errors="replace")
|
||
injected.append(key_str)
|
||
if injected:
|
||
log.debug(f" ENV injecté depuis gateway PID {gw_pid}: {', '.join(injected)}")
|
||
except PermissionError:
|
||
log.warning(f" ⚠️ Accès refusé à /proc/{gw_pid}/environ")
|
||
except Exception as e:
|
||
log.warning(f" ⚠️ Impossible de lire l'env du gateway: {e}")
|
||
else:
|
||
log.warning(" ⚠️ openclaw-gateway introuvable via pgrep")
|
||
|
||
uid_r = subprocess.run(["id", "-u"], capture_output=True, text=True)
|
||
uid = uid_r.stdout.strip()
|
||
if uid:
|
||
env.setdefault("XDG_RUNTIME_DIR", f"/run/user/{uid}")
|
||
env.setdefault("DBUS_SESSION_BUS_ADDRESS", f"unix:path=/run/user/{uid}/bus")
|
||
|
||
return env
|
||
|
||
# ─── TASK BUILDER ──────────────────────────────────────────────────────────────
|
||
|
||
def build_task_for_agent(agent_name: str, state: dict, state_file: Path) -> str:
|
||
project = state.get("project_name", "Projet Inconnu")
|
||
state_path = str(state_file)
|
||
tasks = state.get("tasks", [])
|
||
test_mode = state.get("test_mode", False)
|
||
|
||
pending = [t for t in tasks
|
||
if isinstance(t, dict)
|
||
and t.get("status") == "PENDING"
|
||
and t.get("assigned_to", "").lower() == agent_name.lower()]
|
||
|
||
base = (
|
||
f"Tu es {agent_name}. "
|
||
f"Projet actif : {project}. "
|
||
f"Fichier d'état : {state_path}. "
|
||
f"{'MODE TEST : simule ton travail sans produire de code réel. ' if test_mode else ''}"
|
||
f"Lis ce fichier IMMÉDIATEMENT, exécute ta mission, "
|
||
f"puis mets à jour project_state.json avec tes résultats et le nouveau statut. "
|
||
)
|
||
|
||
instructions = {
|
||
"Foxy-Conductor": (
|
||
base +
|
||
"MISSION : Analyse la demande dans project_state.json. "
|
||
"Crée les tâches initiales dans tasks[], "
|
||
"puis change status à 'AWAITING_ARCHITECT'. "
|
||
"Ajoute ton entrée dans audit_log."
|
||
),
|
||
"Foxy-Architect": (
|
||
base +
|
||
"MISSION : Lis project_state.json. "
|
||
"Produis l'architecture technique (ADR), "
|
||
"découpe en tickets dans tasks[] avec assigned_to, acceptance_criteria, depends_on. "
|
||
"Change status à 'AWAITING_DEV' ou 'AWAITING_UIUX'. "
|
||
"Mets à jour project_state.json."
|
||
),
|
||
"Foxy-Dev": (
|
||
base +
|
||
f"MISSION : Prends la première tâche PENDING assignée à toi. "
|
||
f"Tâches en attente : {json.dumps(pending, ensure_ascii=False)}. "
|
||
"Écris le code, commit sur branche task/TASK-XXX via Gitea. "
|
||
"Change statut tâche → 'IN_REVIEW', projet → 'AWAITING_QA'. "
|
||
"Mets à jour project_state.json."
|
||
),
|
||
"Foxy-UIUX": (
|
||
base +
|
||
f"MISSION : Prends la première tâche UI/PENDING assignée à toi. "
|
||
f"Tâches en attente : {json.dumps(pending, ensure_ascii=False)}. "
|
||
"Crée les composants React/TypeScript, commit sur branche task/TASK-XXX-ui. "
|
||
"Change statut tâche → 'IN_REVIEW', projet → 'AWAITING_QA'. "
|
||
"Mets à jour project_state.json."
|
||
),
|
||
"Foxy-QA": (
|
||
base +
|
||
"MISSION : Audite toutes les tâches 'IN_REVIEW'. "
|
||
"Si APPROUVÉ → statut tâche = 'READY_FOR_DEPLOY'. "
|
||
"Si REJETÉ → statut tâche = 'PENDING' + qa_feedback + reassign agent original. "
|
||
"Si toutes READY_FOR_DEPLOY → status projet = 'AWAITING_DEPLOY'. "
|
||
"Sinon → status = 'AWAITING_DEV' ou 'AWAITING_UIUX'. "
|
||
"Mets à jour project_state.json."
|
||
),
|
||
"Foxy-Admin": (
|
||
base +
|
||
"MISSION : Déploie toutes les tâches 'READY_FOR_DEPLOY'. "
|
||
"Backup avant déploiement. Change chaque tâche → 'DONE'. "
|
||
"Si tout DONE → status projet = 'COMPLETED' + génère final_report. "
|
||
"Mets à jour project_state.json."
|
||
),
|
||
}
|
||
|
||
return instructions.get(agent_name, base + "Exécute ta mission et mets à jour project_state.json.")
|
||
|
||
# ─── SPAWN AGENT ───────────────────────────────────────────────────────────────
|
||
|
||
def spawn_agent(agent_name: str, task_message: str, project_slug: str) -> bool:
|
||
"""Lance un agent openclaw et l'enregistre dans _process_tracker."""
|
||
global _OPENCLAW_SPAWN_CMD, _process_tracker
|
||
|
||
if _OPENCLAW_SPAWN_CMD is None:
|
||
log.error("❌ Syntaxe openclaw non initialisée.")
|
||
return False
|
||
|
||
agent_label = AGENT_LABELS.get(agent_name, agent_name.lower().replace(" ", "-"))
|
||
cmd = build_spawn_cmd(_OPENCLAW_SPAWN_CMD, agent_label, task_message)
|
||
|
||
log.info(f" 🚀 Spawn: {agent_name} (agent: {agent_label})")
|
||
cmd_display = " ".join(c if len(c) < 50 else c[:47] + "..." for c in cmd)
|
||
log.info(f" CMD: {cmd_display}")
|
||
|
||
try:
|
||
session_env = _get_session_env()
|
||
proc = subprocess.Popen(
|
||
cmd,
|
||
stdout=subprocess.PIPE,
|
||
stderr=subprocess.PIPE,
|
||
env=session_env,
|
||
cwd="/home/openclaw/.openclaw/workspace"
|
||
)
|
||
time.sleep(5)
|
||
if proc.poll() is not None and proc.returncode != 0:
|
||
_, stderr = proc.communicate(timeout=5)
|
||
err_msg = stderr.decode()[:400]
|
||
log.error(f" ❌ Spawn échoué immédiatement (code {proc.returncode}):")
|
||
for line in err_msg.splitlines():
|
||
log.error(f" {line}")
|
||
log.error(" 💡 Lance: openclaw agent --help pour voir la syntaxe réelle")
|
||
return False
|
||
|
||
log.info(f" ✅ {agent_name} spawné (PID: {proc.pid})")
|
||
_process_tracker[project_slug] = (proc, agent_name)
|
||
return True
|
||
|
||
except FileNotFoundError:
|
||
log.error(" ❌ 'openclaw' introuvable dans PATH.")
|
||
return False
|
||
except Exception as e:
|
||
log.error(f" ❌ Erreur spawn {agent_name}: {e}")
|
||
return False
|
||
|
||
def is_agent_running(agent_name: str, project_slug: str) -> bool:
|
||
"""
|
||
Vérifie si un agent est actif pour ce projet via le _process_tracker.
|
||
N'utilise plus pgrep pour éviter les faux positifs.
|
||
"""
|
||
if project_slug in _process_tracker:
|
||
proc, tracked_agent = _process_tracker[project_slug]
|
||
if tracked_agent == agent_name and proc.poll() is None:
|
||
return True
|
||
return False
|
||
|
||
# ─── PROCESS WATCHER ───────────────────────────────────────────────────────────
|
||
|
||
def check_finished_agents(state_file: Path):
|
||
"""Détecte un agent terminé sans avoir mis à jour le state → reset en AWAITING."""
|
||
project_slug = state_file.parent.name
|
||
if project_slug not in _process_tracker:
|
||
return
|
||
|
||
proc, agent_name = _process_tracker[project_slug]
|
||
if proc.poll() is None:
|
||
return # encore en vie
|
||
|
||
state = load_state(state_file)
|
||
if not state:
|
||
del _process_tracker[project_slug]
|
||
return
|
||
|
||
status = state.get("status", "")
|
||
project_name = state.get("project_name", project_slug)
|
||
exit_code = proc.returncode
|
||
|
||
if status in RUNNING_STATUSES and status not in ("COMPLETED", "FAILED"):
|
||
awaiting = {v[1]: k for k, v in STATUS_TRANSITIONS.items()}
|
||
reset_to = awaiting.get(status, "AWAITING_CONDUCTOR")
|
||
|
||
try:
|
||
_, stderr = proc.communicate(timeout=2)
|
||
err_snippet = (stderr or b"").decode()[:300].strip()
|
||
except Exception:
|
||
err_snippet = ""
|
||
|
||
_failure_counter[project_slug] = _failure_counter.get(project_slug, 0) + 1
|
||
nb = _failure_counter[project_slug]
|
||
|
||
if exit_code != 0:
|
||
log.error(f" ❌ {agent_name} terminé en erreur (code {exit_code}):")
|
||
for line in err_snippet.splitlines():
|
||
log.error(f" {line}")
|
||
|
||
log.error(f" 🔄 Reset {project_name}: {status} → {reset_to} (échec #{nb})")
|
||
mark_status(state_file, state, reset_to, "foxy-autopilot-process-watcher")
|
||
|
||
if nb >= MAX_CONSECUTIVE_FAILURES:
|
||
_suspended_until[project_slug] = 0
|
||
log.error(
|
||
f" 🚫 {project_name}: {nb} échecs consécutifs — SUSPENDU "
|
||
f"({SUSPEND_DURATION_CYCLES} cycles).\n"
|
||
f" 💡 Vérifie: systemctl --user status openclaw-gateway"
|
||
)
|
||
notify(
|
||
f"🦊 🚫 <b>Projet SUSPENDU</b>\n"
|
||
f"📋 {project_name}\n"
|
||
f"❌ {nb} échecs consécutifs de {agent_name}\n"
|
||
f"⏸️ Pause de {SUSPEND_DURATION_CYCLES * POLL_INTERVAL}s\n"
|
||
f"<code>{err_snippet[:150]}</code>"
|
||
)
|
||
else:
|
||
notify(
|
||
f"🦊 ⚠️ <b>Agent échoué ({nb}/{MAX_CONSECUTIVE_FAILURES})</b>\n"
|
||
f"📋 {project_name} — {agent_name} (code {exit_code})\n"
|
||
f"🔄 Reset → {reset_to}"
|
||
)
|
||
else:
|
||
_failure_counter[project_slug] = 0
|
||
log.info(f" ✅ {agent_name} terminé proprement (code {exit_code}), statut: {status}")
|
||
|
||
del _process_tracker[project_slug]
|
||
|
||
# ─── WATCHDOG ──────────────────────────────────────────────────────────────────
|
||
|
||
def check_stuck_agents(state_file: Path):
|
||
"""Reset si un agent tourne depuis plus de SPAWN_TIMEOUT sans changer le state."""
|
||
state = load_state(state_file)
|
||
if not state:
|
||
return
|
||
|
||
status = state.get("status", "")
|
||
if status not in RUNNING_STATUSES or status in ("COMPLETED", "FAILED"):
|
||
return
|
||
|
||
updated_at_str = state.get("updated_at", state.get("created_at", ""))
|
||
if not updated_at_str:
|
||
return
|
||
|
||
try:
|
||
updated_at = datetime.fromisoformat(updated_at_str.replace("Z", "+00:00"))
|
||
elapsed = (utcnow() - updated_at).total_seconds()
|
||
except Exception:
|
||
return
|
||
|
||
if elapsed > SPAWN_TIMEOUT:
|
||
project_name = state.get("project_name", state_file.parent.name)
|
||
log.warning(f" ⚠️ {project_name} bloqué depuis {elapsed/3600:.1f}h — reset")
|
||
awaiting = {v[1]: k for k, v in STATUS_TRANSITIONS.items()}
|
||
reset_to = awaiting.get(status, "AWAITING_CONDUCTOR")
|
||
mark_status(state_file, state, reset_to, "foxy-autopilot-watchdog")
|
||
notify(
|
||
f"🦊 ⚠️ <b>Watchdog</b>\n"
|
||
f"📋 {project_name}\n"
|
||
f"⏱️ Bloqué depuis {elapsed/3600:.1f}h → Reset {reset_to}"
|
||
)
|
||
|
||
# ─── BOUCLE PRINCIPALE ─────────────────────────────────────────────────────────
|
||
|
||
def find_project_states() -> list[Path]:
|
||
states = []
|
||
for proj_dir in WORKSPACE.iterdir():
|
||
if proj_dir.is_dir():
|
||
sf = proj_dir / "project_state.json"
|
||
if sf.exists():
|
||
states.append(sf)
|
||
return states
|
||
|
||
def process_project(state_file: Path, current_cycle: int = 0):
|
||
state = load_state(state_file)
|
||
if not state:
|
||
return
|
||
|
||
status = state.get("status", "")
|
||
project_slug = state_file.parent.name
|
||
project_name = state.get("project_name", project_slug)
|
||
|
||
# Vérifier suspension
|
||
if project_slug in _suspended_until:
|
||
resume_at = _suspended_until[project_slug]
|
||
if resume_at == 0:
|
||
_suspended_until[project_slug] = current_cycle + SUSPEND_DURATION_CYCLES
|
||
log.warning(f" 🚫 {project_name}: suspendu jusqu'au cycle #{current_cycle + SUSPEND_DURATION_CYCLES}")
|
||
return
|
||
elif current_cycle < resume_at:
|
||
log.warning(f" 🚫 {project_name}: suspendu encore {resume_at - current_cycle} cycle(s)")
|
||
return
|
||
else:
|
||
log.info(f" ▶️ {project_name}: suspension levée, reprise")
|
||
del _suspended_until[project_slug]
|
||
_failure_counter[project_slug] = 0
|
||
|
||
if status in RUNNING_STATUSES:
|
||
if status not in ("COMPLETED", "FAILED"):
|
||
check_finished_agents(state_file)
|
||
return
|
||
|
||
if status not in STATUS_TRANSITIONS:
|
||
log.debug(f" ℹ️ {project_name}: statut '{status}' non géré")
|
||
return
|
||
|
||
agent_name, running_status = STATUS_TRANSITIONS[status]
|
||
log.info(f"📋 Projet: {project_name} | Statut: {status} → Agent: {agent_name}")
|
||
|
||
if is_agent_running(agent_name, project_slug):
|
||
log.info(f" ⏳ {agent_name} déjà actif pour {project_slug}, on attend...")
|
||
return
|
||
|
||
task_msg = build_task_for_agent(agent_name, state, state_file)
|
||
success = spawn_agent(agent_name, task_msg, project_slug=project_slug)
|
||
|
||
if success:
|
||
mark_status(state_file, state, running_status, "foxy-autopilot")
|
||
notify(
|
||
f"🦊 <b>Foxy Dev Team</b>\n"
|
||
f"📋 <b>{project_name}</b>\n"
|
||
f"🤖 {agent_name} lancé\n"
|
||
f"📊 {status} → {running_status}"
|
||
)
|
||
else:
|
||
log.error(f" ❌ Échec spawn {agent_name} pour {project_name}")
|
||
notify(
|
||
f"🦊 ⚠️ <b>Échec spawn</b>\n"
|
||
f"📋 {project_name}\n"
|
||
f"❌ Impossible de lancer {agent_name}\n"
|
||
f"Vérifie les logs : {LOG_FILE}"
|
||
)
|
||
|
||
# ─── DAEMON ────────────────────────────────────────────────────────────────────
|
||
|
||
def run_daemon():
|
||
global _OPENCLAW_SPAWN_CMD
|
||
|
||
if not acquire_pid_lock():
|
||
sys.exit(1)
|
||
|
||
log.info("=" * 60)
|
||
log.info("🦊 FOXY AUTO-PILOT DAEMON v2.3 — DÉMARRÉ")
|
||
log.info(f" Workspace : {WORKSPACE}")
|
||
log.info(f" Polling : {POLL_INTERVAL}s")
|
||
log.info(f" Log : {LOG_FILE}")
|
||
log.info("=" * 60)
|
||
|
||
log.info("🔍 Détection syntaxe openclaw...")
|
||
_OPENCLAW_SPAWN_CMD = probe_openclaw_syntax()
|
||
if _OPENCLAW_SPAWN_CMD is None:
|
||
log.error("❌ Impossible de détecter la syntaxe openclaw — daemon arrêté.")
|
||
sys.exit(1)
|
||
|
||
notify(
|
||
"🦊 <b>Foxy Auto-Pilot v2.3 démarré</b>\n"
|
||
f"⏱️ Polling toutes les {POLL_INTERVAL}s"
|
||
)
|
||
|
||
cycle = 0
|
||
while _running:
|
||
cycle += 1
|
||
log.info(f"🔍 Cycle #{cycle} — {utcnow().strftime('%H:%M:%S')} UTC")
|
||
try:
|
||
state_files = find_project_states()
|
||
if not state_files:
|
||
log.info(" (aucun projet dans le workspace)")
|
||
else:
|
||
for sf in state_files:
|
||
process_project(sf, current_cycle=cycle)
|
||
check_stuck_agents(sf)
|
||
except Exception as e:
|
||
log.error(f"Erreur cycle #{cycle}: {e}", exc_info=True)
|
||
|
||
log.info(f"⏳ Prochaine vérification dans {POLL_INTERVAL}s...\n")
|
||
for _ in range(POLL_INTERVAL):
|
||
if not _running:
|
||
break
|
||
time.sleep(1)
|
||
|
||
log.info("🛑 Daemon arrêté proprement.")
|
||
notify("🛑 <b>Foxy Auto-Pilot arrêté</b>")
|
||
|
||
# ─── ENTRY POINT ───────────────────────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
if len(sys.argv) > 1 and sys.argv[1] == "--submit":
|
||
if len(sys.argv) < 3:
|
||
print("Usage: python3 foxy-autopilot.py --submit 'Description'")
|
||
sys.exit(1)
|
||
description = " ".join(sys.argv[2:])
|
||
project_slug = "proj-" + utcnow().strftime("%Y%m%d-%H%M%S")
|
||
proj_dir = WORKSPACE / project_slug
|
||
proj_dir.mkdir(parents=True, exist_ok=True)
|
||
state_file = proj_dir / "project_state.json"
|
||
initial_state = {
|
||
"project_name": project_slug,
|
||
"description": description,
|
||
"status": "AWAITING_CONDUCTOR",
|
||
"created_at": utcnow_iso(),
|
||
"updated_at": utcnow_iso(),
|
||
"tasks": [],
|
||
"audit_log": [{"timestamp": utcnow_iso(), "action": "PROJECT_SUBMITTED",
|
||
"agent": "user", "details": description[:200]}]
|
||
}
|
||
with open(state_file, "w") as f:
|
||
json.dump(initial_state, f, indent=2, ensure_ascii=False)
|
||
print(f"✅ Projet soumis : {project_slug}")
|
||
print(f"📁 State file : {state_file}")
|
||
|
||
elif len(sys.argv) > 1 and sys.argv[1] == "--probe":
|
||
print("🔍 Probe syntaxe openclaw...")
|
||
cmd = probe_openclaw_syntax()
|
||
if cmd:
|
||
print(f"✅ Syntaxe détectée : {' '.join(cmd)}")
|
||
else:
|
||
print("❌ Aucune syntaxe détectée.")
|
||
|
||
elif len(sys.argv) > 1 and sys.argv[1] == "--reset-running":
|
||
print("🔄 Reset de tous les projets RUNNING → AWAITING...")
|
||
awaiting_map = {v[1]: k for k, v in STATUS_TRANSITIONS.items()}
|
||
count = 0
|
||
for sf in find_project_states():
|
||
state = load_state(sf)
|
||
if not state:
|
||
continue
|
||
status = state.get("status", "")
|
||
if status in RUNNING_STATUSES and status not in ("COMPLETED", "FAILED"):
|
||
reset_to = awaiting_map.get(status, "AWAITING_CONDUCTOR")
|
||
project_name = state.get("project_name", sf.parent.name)
|
||
print(f" {project_name}: {status} → {reset_to}")
|
||
mark_status(sf, state, reset_to, "foxy-autopilot-manual-reset")
|
||
count += 1
|
||
print(f"✅ {count} projet(s) remis en attente.")
|
||
|
||
else:
|
||
run_daemon()
|