ics-simlab-config-gen-claude/tools/build_config.py

#!/usr/bin/env python3
"""
Build and validate ICS-SimLab configuration.

This is the config pipeline entrypoint that:
1. Loads raw JSON
2. Validates/normalizes with Pydantic v2 (type coercion)
3. Writes configuration_normalized.json
4. Enriches with monitors/controllers (calls existing enrich_config)
5. Re-validates enriched config
6. Runs semantic validation
7. Writes configuration_enriched.json (source of truth)

Usage:
    python3 -m tools.build_config \\
        --config examples/water_tank/configuration.json \\
        --out-dir outputs/test_config \\
        --overwrite

    # Strict mode (no type coercion, fail on type mismatch):
    python3 -m tools.build_config \\
        --config examples/water_tank/configuration.json \\
        --out-dir outputs/test_config \\
        --strict
"""

import argparse
import json
import logging
import sys
from pathlib import Path
from typing import Any, Dict

from models.ics_simlab_config_v2 import Config, set_strict_mode
from tools.enrich_config import enrich_plc_connections, enrich_hmi_connections
from tools.semantic_validation import validate_all_semantics, SemanticError
from tools.repair_config import repair_orphan_devices, repair_boolean_types, repair_plc_local_registers, repair_hmi_controller_registers, repair_target_device_registers
from services.patches import patch_sanitize_connection_ids

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(levelname)s: %(message)s"
)
logger = logging.getLogger(__name__)


def load_and_normalize(raw_path: Path) -> Config:
    """
    Load JSON and validate with Pydantic, normalizing types.

    Args:
        raw_path: Path to configuration.json

    Returns:
        Validated Config object

    Raises:
        SystemExit: On validation failure
    """
    raw_text = raw_path.read_text(encoding="utf-8")

    try:
        raw_data = json.loads(raw_text)
    except json.JSONDecodeError as e:
        raise SystemExit(f"ERROR: Invalid JSON in {raw_path}: {e}")

    try:
        return Config.model_validate(raw_data)
    except Exception as e:
        raise SystemExit(f"ERROR: Pydantic validation failed:\n{e}")


def config_to_dict(cfg: Config) -> Dict[str, Any]:
    """Convert Pydantic model to dict for JSON serialization.

    Uses exclude_none=True to remove null values, which prevents
    ICS-SimLab runtime errors like 'identity': None causing
    TypeError when PLC code checks 'if "identity" in configs'.
    """
    return cfg.model_dump(mode="json", exclude_none=True)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Build and validate ICS-SimLab configuration"
    )
    parser.add_argument(
        "--config",
        required=True,
        help="Input configuration.json path"
    )
    parser.add_argument(
        "--out-dir",
        required=True,
        help="Output directory for normalized and enriched configs"
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Overwrite existing output files"
    )
    parser.add_argument(
        "--strict",
        action="store_true",
        help="Strict mode: disable type coercion, fail on type mismatch"
    )
    parser.add_argument(
        "--skip-semantic",
        action="store_true",
        help="Skip semantic validation (for debugging)"
    )
    parser.add_argument(
        "--json-errors",
        action="store_true",
        help="Output semantic errors as JSON to stdout (for programmatic use)"
    )
    parser.add_argument(
        "--repair",
        action="store_true",
        help="Auto-repair orphan devices and boolean type issues"
    )
    args = parser.parse_args()

    config_path = Path(args.config)
    out_dir = Path(args.out_dir)

    if not config_path.exists():
        raise SystemExit(f"ERROR: Config file not found: {config_path}")

    # Enable strict mode if requested
    if args.strict:
        set_strict_mode(True)

    # Prepare output path (single file: configuration.json = enriched version)
    output_path = out_dir / "configuration.json"

    if output_path.exists() and not args.overwrite:
        raise SystemExit(f"ERROR: Output file exists: {output_path} (use --overwrite)")

    # Ensure output directory exists
    out_dir.mkdir(parents=True, exist_ok=True)

    # =========================================================================
    # Step 1: Load and normalize with Pydantic
    # =========================================================================
    print("=" * 60)
    print("Step 1: Loading and normalizing configuration")
    print("=" * 60)

    config = load_and_normalize(config_path)

    print(f"  Source: {config_path}")
    print(f"  PLCs: {len(config.plcs)}")
    print(f"  HILs: {len(config.hils)}")
    print(f"  Sensors: {len(config.sensors)}")
    print(f"  Actuators: {len(config.actuators)}")
    print(f"  HMIs: {len(config.hmis)}")
    print("  Pydantic validation: OK")

    # =========================================================================
    # Step 2: Enrich configuration
    # =========================================================================
    print()
    print("=" * 60)
    print("Step 2: Enriching configuration")
    print("=" * 60)

    # Work with dict for enrichment (existing enrich_config expects dict)
    config_dict = config_to_dict(config)
    enriched_dict = enrich_plc_connections(dict(config_dict))
    enriched_dict = enrich_hmi_connections(enriched_dict)

    # Sanitize connection IDs to docker-safe format [a-z0-9_]
    print()
    print("  Sanitizing connection IDs...")
    enriched_dict, conn_id_errors = patch_sanitize_connection_ids(enriched_dict)
    if conn_id_errors:
        for err in conn_id_errors:
            logger.warning(f"Connection ID patch error: {err}")
    print("  Connection IDs sanitized: OK")

    # Re-validate enriched config with Pydantic
    print()
    print("  Re-validating enriched config...")
    try:
        enriched_config = Config.model_validate(enriched_dict)
        print("  Enriched config validation: OK")
    except Exception as e:
        raise SystemExit(f"ERROR: Enriched config failed Pydantic validation:\n{e}")

    # =========================================================================
    # Step 3: Repair (optional)
    # =========================================================================
    if args.repair:
        all_repair_actions = []

        # Step 3a: Repair orphan devices
        print()
        print("=" * 60)
        print("Step 3a: Repairing orphan devices")
        print("=" * 60)

        enriched_dict, orphan_actions = repair_orphan_devices(enriched_dict)
        all_repair_actions.extend(orphan_actions)

        if orphan_actions:
            for action in orphan_actions:
                print(f"  REPAIRED: {action}")
        else:
            print("  No orphan devices found")

        # Step 3b: Repair boolean types
        print()
        print("=" * 60)
        print("Step 3b: Repairing boolean register types")
        print("=" * 60)

        enriched_dict, boolean_actions = repair_boolean_types(enriched_dict)
        all_repair_actions.extend(boolean_actions)

        if boolean_actions:
            for action in boolean_actions:
                print(f"  REPAIRED: {action}")
        else:
            print("  No boolean type issues found")

        # Step 3c: Repair PLC local register coherence
        print()
        print("=" * 60)
        print("Step 3c: Repairing PLC local register coherence")
        print("=" * 60)

        enriched_dict, local_reg_actions = repair_plc_local_registers(enriched_dict)
        all_repair_actions.extend(local_reg_actions)

        if local_reg_actions:
            for action in local_reg_actions:
                print(f"  REPAIRED: {action}")
        else:
            print("  No PLC local register issues found")

        # Step 3d: Repair HMI controller registers
        print()
        print("=" * 60)
        print("Step 3d: Repairing HMI controller registers")
        print("=" * 60)

        enriched_dict, hmi_ctrl_actions = repair_hmi_controller_registers(enriched_dict)
        all_repair_actions.extend(hmi_ctrl_actions)

        if hmi_ctrl_actions:
            for action in hmi_ctrl_actions:
                print(f"  REPAIRED: {action}")
        else:
            print("  No HMI controller register issues found")

        # Step 3e: Repair target device registers (actuators, sensors, PLCs)
        print()
        print("=" * 60)
        print("Step 3e: Repairing target device registers")
        print("=" * 60)

        enriched_dict, target_reg_actions = repair_target_device_registers(enriched_dict)
        all_repair_actions.extend(target_reg_actions)

        if target_reg_actions:
            for action in target_reg_actions:
                print(f"  REPAIRED: {action}")
        else:
            print("  No target device register issues found")

        # Re-validate after all repairs
        if all_repair_actions:
            print()
            print("  Re-validating after repairs...")
            try:
                enriched_config = Config.model_validate(enriched_dict)
                print("  Post-repair validation: OK")
            except Exception as e:
                raise SystemExit(f"ERROR: Repair produced invalid config:\n{e}")

    # =========================================================================
    # Step 4: Semantic validation (P0 checks)
    # =========================================================================
    if not args.skip_semantic:
        print()
        print("=" * 60)
        print("Step 4: Semantic validation (P0 checks)")
        print("=" * 60)

        errors = validate_all_semantics(enriched_config)

        if errors:
            if args.json_errors:
                # Output errors as JSON for programmatic consumption
                error_list = [{"entity": err.entity, "message": err.message} for err in errors]
                print(json.dumps({"semantic_errors": error_list}, indent=2))
                sys.exit(2)  # Exit code 2 = semantic validation failure
            else:
                print()
                print("SEMANTIC VALIDATION ERRORS:")
                for err in errors:
                    print(f"  - {err}")
                print()
                raise SystemExit(
                    f"ERROR: Semantic validation failed with {len(errors)} error(s). "
                    f"Fix the configuration and retry, or use --repair to auto-fix orphans."
                )
        else:
            print("  HMI monitors/controllers: OK")
            print("  PLC monitors/controllers: OK")
            print("  Orphan devices: OK")
            print("  Boolean type rules: OK")
            print("  PLC local register coherence: OK")
    else:
        print()
        print("=" * 60)
        print("Step 4: Semantic validation (SKIPPED)")
        print("=" * 60)

    # =========================================================================
    # Step 5: Write final configuration
    # =========================================================================
    print()
    print("=" * 60)
    print("Step 5: Writing configuration.json")
    print("=" * 60)

    final_dict = config_to_dict(enriched_config)
    output_path.write_text(
        json.dumps(final_dict, indent=2, ensure_ascii=False),
        encoding="utf-8"
    )
    print(f"  Written: {output_path}")

    # =========================================================================
    # Summary
    # =========================================================================
    print()
    print("#" * 60)
    print("# SUCCESS: Configuration built and validated")
    print("#" * 60)
    print()
    print(f"Output: {output_path}")
    print()

    # Summarize enrichment
    for plc in enriched_config.plcs:
        n_conn = len(plc.outbound_connections)
        n_mon = len(plc.monitors)
        n_ctrl = len(plc.controllers)
        print(f"  {plc.name}: {n_conn} connections, {n_mon} monitors, {n_ctrl} controllers")

    for hmi in enriched_config.hmis:
        n_mon = len(hmi.monitors)
        n_ctrl = len(hmi.controllers)
        print(f"  {hmi.name}: {n_mon} monitors, {n_ctrl} controllers")


if __name__ == "__main__":
    main()