ics-simlab-config-gen-claude/tools/generate_process_spec.py

#!/usr/bin/env python3
"""
Generate process_spec.json from a textual prompt using LLM.

Uses structured output (json_schema) to ensure valid ProcessSpec.

Usage:
    python3 -m tools.generate_process_spec \
        --prompt examples/water_tank/prompt.txt \
        --config outputs/configuration.json \
        --out outputs/process_spec.json
"""

from __future__ import annotations

import argparse
import json
import os
from pathlib import Path

from dotenv import load_dotenv
from openai import OpenAI

from models.process_spec import ProcessSpec, get_process_spec_json_schema


SYSTEM_PROMPT = """\
You are an expert in process control and physics modeling for ICS simulations.

Your task is to generate a ProcessSpec JSON object that describes the physics of a water tank system.

The ProcessSpec must match this exact schema and contain realistic physical parameters.

Guidelines:
1. model: must be "water_tank_v1"
2. dt: simulation time step in seconds (typically 0.05 to 0.5)
3. params:
   - level_min: minimum level in meters (typically 0)
   - level_max: maximum level in meters (e.g., 1.0 to 10.0)
   - level_init: initial level (must be between min and max)
   - area: tank cross-sectional area in m^2 (e.g., 0.5 to 10.0)
   - q_in_max: maximum inflow rate in m^3/s when valve fully open (e.g., 0.001 to 0.1)
   - k_out: outflow coefficient in m^2.5/s (Q_out = k_out * sqrt(level))
4. signals: map logical names to actual HIL physical_values keys from the config

The signals must use keys that exist in the HIL's physical_values in the provided configuration.

Output ONLY the JSON object, no explanations.
"""


def build_user_prompt(scenario_text: str, config_json: str) -> str:
    """Build the user prompt with scenario and config context."""
    return f"""\
Scenario description:
{scenario_text}

Current configuration.json (use physical_values keys from hils[]):
{config_json}

Generate a ProcessSpec JSON for the water tank physics in this scenario.
Map the signals to the correct physical_values keys from the HIL configuration.
"""


def generate_process_spec(
    client: OpenAI,
    model: str,
    prompt_text: str,
    config_text: str,
    max_output_tokens: int = 1000,
) -> ProcessSpec:
    """Generate ProcessSpec using LLM with structured output."""
    schema = get_process_spec_json_schema()

    user_prompt = build_user_prompt(prompt_text, config_text)

    req = {
        "model": model,
        "input": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt},
        ],
        "max_output_tokens": max_output_tokens,
        "text": {
            "format": {
                "type": "json_schema",
                "name": "process_spec",
                "strict": True,
                "schema": schema,
            },
        },
    }

    # GPT-5 models: use reasoning instead of temperature
    if model.startswith("gpt-5"):
        req["reasoning"] = {"effort": "minimal"}
    else:
        req["temperature"] = 0

    resp = client.responses.create(**req)

    # Extract JSON from response
    raw_text = resp.output_text
    spec_dict = json.loads(raw_text)
    return ProcessSpec.model_validate(spec_dict)


def main() -> None:
    load_dotenv()

    parser = argparse.ArgumentParser(
        description="Generate process_spec.json from textual prompt using LLM"
    )
    parser.add_argument(
        "--prompt",
        required=True,
        help="Path to prompt text file describing the scenario",
    )
    parser.add_argument(
        "--config",
        default="outputs/configuration.json",
        help="Path to configuration.json (for HIL physical_values context)",
    )
    parser.add_argument(
        "--out",
        default="outputs/process_spec.json",
        help="Output path for process_spec.json",
    )
    parser.add_argument(
        "--model",
        default="gpt-4o-mini",
        help="OpenAI model to use",
    )
    args = parser.parse_args()

    if not os.getenv("OPENAI_API_KEY"):
        raise SystemExit("OPENAI_API_KEY not set. Run: export OPENAI_API_KEY='...'")

    prompt_path = Path(args.prompt)
    config_path = Path(args.config)
    out_path = Path(args.out)

    if not prompt_path.exists():
        raise SystemExit(f"Prompt file not found: {prompt_path}")
    if not config_path.exists():
        raise SystemExit(f"Config file not found: {config_path}")

    prompt_text = prompt_path.read_text(encoding="utf-8")
    config_text = config_path.read_text(encoding="utf-8")

    print(f"Generating process spec from: {prompt_path}")
    print(f"Using config context from: {config_path}")
    print(f"Model: {args.model}")

    client = OpenAI()
    spec = generate_process_spec(
        client=client,
        model=args.model,
        prompt_text=prompt_text,
        config_text=config_text,
    )

    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(
        json.dumps(spec.model_dump(), indent=2, ensure_ascii=False),
        encoding="utf-8",
    )
    print(f"Wrote: {out_path}")


if __name__ == "__main__":
    main()