#!/usr/bin/env python3 """ Generate process_spec.json from a textual prompt using LLM. Uses structured output (json_schema) to ensure valid ProcessSpec. Usage: python3 -m tools.generate_process_spec \ --prompt examples/water_tank/prompt.txt \ --config outputs/configuration.json \ --out outputs/process_spec.json """ from __future__ import annotations import argparse import json import os from pathlib import Path from dotenv import load_dotenv from openai import OpenAI from models.process_spec import ProcessSpec, get_process_spec_json_schema SYSTEM_PROMPT = """\ You are an expert in process control and physics modeling for ICS simulations. Your task is to generate a ProcessSpec JSON object that describes the physics of a water tank system. The ProcessSpec must match this exact schema and contain realistic physical parameters. Guidelines: 1. model: must be "water_tank_v1" 2. dt: simulation time step in seconds (typically 0.05 to 0.5) 3. params: - level_min: minimum level in meters (typically 0) - level_max: maximum level in meters (e.g., 1.0 to 10.0) - level_init: initial level (must be between min and max) - area: tank cross-sectional area in m^2 (e.g., 0.5 to 10.0) - q_in_max: maximum inflow rate in m^3/s when valve fully open (e.g., 0.001 to 0.1) - k_out: outflow coefficient in m^2.5/s (Q_out = k_out * sqrt(level)) 4. signals: map logical names to actual HIL physical_values keys from the config The signals must use keys that exist in the HIL's physical_values in the provided configuration. Output ONLY the JSON object, no explanations. """ def build_user_prompt(scenario_text: str, config_json: str) -> str: """Build the user prompt with scenario and config context.""" return f"""\ Scenario description: {scenario_text} Current configuration.json (use physical_values keys from hils[]): {config_json} Generate a ProcessSpec JSON for the water tank physics in this scenario. Map the signals to the correct physical_values keys from the HIL configuration. """ def generate_process_spec( client: OpenAI, model: str, prompt_text: str, config_text: str, max_output_tokens: int = 1000, ) -> ProcessSpec: """Generate ProcessSpec using LLM with structured output.""" schema = get_process_spec_json_schema() user_prompt = build_user_prompt(prompt_text, config_text) req = { "model": model, "input": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ], "max_output_tokens": max_output_tokens, "text": { "format": { "type": "json_schema", "name": "process_spec", "strict": True, "schema": schema, }, }, } # GPT-5 models: use reasoning instead of temperature if model.startswith("gpt-5"): req["reasoning"] = {"effort": "minimal"} else: req["temperature"] = 0 resp = client.responses.create(**req) # Extract JSON from response raw_text = resp.output_text spec_dict = json.loads(raw_text) return ProcessSpec.model_validate(spec_dict) def main() -> None: load_dotenv() parser = argparse.ArgumentParser( description="Generate process_spec.json from textual prompt using LLM" ) parser.add_argument( "--prompt", required=True, help="Path to prompt text file describing the scenario", ) parser.add_argument( "--config", default="outputs/configuration.json", help="Path to configuration.json (for HIL physical_values context)", ) parser.add_argument( "--out", default="outputs/process_spec.json", help="Output path for process_spec.json", ) parser.add_argument( "--model", default="gpt-4o-mini", help="OpenAI model to use", ) args = parser.parse_args() if not os.getenv("OPENAI_API_KEY"): raise SystemExit("OPENAI_API_KEY not set. Run: export OPENAI_API_KEY='...'") prompt_path = Path(args.prompt) config_path = Path(args.config) out_path = Path(args.out) if not prompt_path.exists(): raise SystemExit(f"Prompt file not found: {prompt_path}") if not config_path.exists(): raise SystemExit(f"Config file not found: {config_path}") prompt_text = prompt_path.read_text(encoding="utf-8") config_text = config_path.read_text(encoding="utf-8") print(f"Generating process spec from: {prompt_path}") print(f"Using config context from: {config_path}") print(f"Model: {args.model}") client = OpenAI() spec = generate_process_spec( client=client, model=args.model, prompt_text=prompt_text, config_text=config_text, ) out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text( json.dumps(spec.model_dump(), indent=2, ensure_ascii=False), encoding="utf-8", ) print(f"Wrote: {out_path}") if __name__ == "__main__": main()