ics-simlab-config-gen-claude/tools/semantic_validation.py

772 lines
29 KiB
Python

#!/usr/bin/env python3
"""
Semantic validation for ICS-SimLab configuration.
Validates that HMI monitors and controllers correctly reference:
1. Valid outbound_connection_id in HMI's outbound_connections
2. Reachable target device (by IP)
3. Existing register on target device (by id)
4. Matching value_type and address
5. Network configuration: no duplicate IPs, valid subnets
This is deterministic validation - no guessing or heuristics.
If something cannot be verified, it fails with a clear error.
"""
import ipaddress
from dataclasses import dataclass
from typing import Dict, List, Optional, Set, Tuple, Union
from models.ics_simlab_config_v2 import (
Config,
HMI,
PLC,
Sensor,
Actuator,
RegisterBlock,
TCPConnection,
IPNetwork,
)
@dataclass
class SemanticError:
"""A semantic validation error."""
entity: str # e.g., "hmi1.monitors[0]"
message: str
def __str__(self) -> str:
return f"{self.entity}: {self.message}"
Device = Union[PLC, Sensor, Actuator]
def _build_device_by_ip(config: Config) -> Dict[str, Tuple[str, Device]]:
"""
Build mapping from IP address to (device_type, device_object).
Only TCP-connected devices are indexed (RTU devices use serial ports).
"""
mapping: Dict[str, Tuple[str, Device]] = {}
for plc in config.plcs:
if plc.network and plc.network.ip:
mapping[plc.network.ip] = ("plc", plc)
for sensor in config.sensors:
if sensor.network and sensor.network.ip:
mapping[sensor.network.ip] = ("sensor", sensor)
for actuator in config.actuators:
if actuator.network and actuator.network.ip:
mapping[actuator.network.ip] = ("actuator", actuator)
return mapping
def _find_register_in_block(
registers: RegisterBlock,
register_id: str,
) -> Optional[Tuple[str, int, int]]:
"""
Find a register by id in a RegisterBlock.
Args:
registers: The RegisterBlock to search
register_id: The register id to find
Returns:
(value_type, address, count) if found, None otherwise
"""
for reg_type, reg_list in [
("coil", registers.coil),
("discrete_input", registers.discrete_input),
("holding_register", registers.holding_register),
("input_register", registers.input_register),
]:
for reg in reg_list:
# Match by id or physical_value (sensors use physical_value)
if reg.id == register_id or reg.physical_value == register_id:
return (reg_type, reg.address, reg.count)
return None
def validate_hmi_semantics(config: Config) -> List[SemanticError]:
"""
Validate HMI monitors and controllers semantically.
For each monitor/controller:
1. Verify outbound_connection_id exists in HMI's outbound_connections
2. Verify target device (by IP) exists
3. Verify register exists on target device
4. Verify value_type and address match target register
Args:
config: Validated Config object
Returns:
List of SemanticError objects (empty if all valid)
"""
errors: List[SemanticError] = []
device_by_ip = _build_device_by_ip(config)
for hmi in config.hmis:
hmi_name = hmi.name
# Build connection_id -> target_ip mapping (TCP connections only)
conn_to_ip: Dict[str, str] = {}
for conn in hmi.outbound_connections:
if isinstance(conn, TCPConnection) and conn.id:
conn_to_ip[conn.id] = conn.ip
# Validate monitors
for i, monitor in enumerate(hmi.monitors):
entity = f"{hmi_name}.monitors[{i}] (id='{monitor.id}')"
# Check outbound_connection exists
if monitor.outbound_connection_id not in conn_to_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"outbound_connection_id '{monitor.outbound_connection_id}' "
f"not found in HMI outbound_connections. "
f"Available: {sorted(conn_to_ip.keys())}"
)
))
continue
target_ip = conn_to_ip[monitor.outbound_connection_id]
# Check target device exists
if target_ip not in device_by_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"Target IP '{target_ip}' not found in any device. "
f"Available IPs: {sorted(device_by_ip.keys())}"
)
))
continue
device_type, device = device_by_ip[target_ip]
# Check register exists on target
reg_info = _find_register_in_block(device.registers, monitor.id)
if reg_info is None:
errors.append(SemanticError(
entity=entity,
message=(
f"Register '{monitor.id}' not found on {device_type} "
f"'{device.name}' (IP: {target_ip})"
)
))
continue
expected_type, expected_addr, expected_count = reg_info
# Verify value_type matches (no guessing - must match exactly)
if monitor.value_type != expected_type:
errors.append(SemanticError(
entity=entity,
message=(
f"value_type mismatch: monitor has '{monitor.value_type}' "
f"but {device.name}.{monitor.id} is '{expected_type}'"
)
))
# Verify address matches
if monitor.address != expected_addr:
errors.append(SemanticError(
entity=entity,
message=(
f"address mismatch: monitor has {monitor.address} "
f"but {device.name}.{monitor.id} is at address {expected_addr}"
)
))
# Validate controllers (same logic as monitors)
for i, controller in enumerate(hmi.controllers):
entity = f"{hmi_name}.controllers[{i}] (id='{controller.id}')"
# Check outbound_connection exists
if controller.outbound_connection_id not in conn_to_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"outbound_connection_id '{controller.outbound_connection_id}' "
f"not found in HMI outbound_connections. "
f"Available: {sorted(conn_to_ip.keys())}"
)
))
continue
target_ip = conn_to_ip[controller.outbound_connection_id]
# Check target device exists
if target_ip not in device_by_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"Target IP '{target_ip}' not found in any device. "
f"Available IPs: {sorted(device_by_ip.keys())}"
)
))
continue
device_type, device = device_by_ip[target_ip]
# Check register exists on target
reg_info = _find_register_in_block(device.registers, controller.id)
if reg_info is None:
errors.append(SemanticError(
entity=entity,
message=(
f"Register '{controller.id}' not found on {device_type} "
f"'{device.name}' (IP: {target_ip})"
)
))
continue
expected_type, expected_addr, expected_count = reg_info
# Verify value_type matches
if controller.value_type != expected_type:
errors.append(SemanticError(
entity=entity,
message=(
f"value_type mismatch: controller has '{controller.value_type}' "
f"but {device.name}.{controller.id} is '{expected_type}'"
)
))
# Verify address matches
if controller.address != expected_addr:
errors.append(SemanticError(
entity=entity,
message=(
f"address mismatch: controller has {controller.address} "
f"but {device.name}.{controller.id} is at address {expected_addr}"
)
))
return errors
def validate_plc_semantics(config: Config) -> List[SemanticError]:
"""
Validate PLC monitors and controllers semantically.
Similar to HMI validation but for PLC-to-sensor/actuator connections.
Args:
config: Validated Config object
Returns:
List of SemanticError objects (empty if all valid)
"""
errors: List[SemanticError] = []
device_by_ip = _build_device_by_ip(config)
for plc in config.plcs:
plc_name = plc.name
# Build connection_id -> target_ip mapping (TCP connections only)
conn_to_ip: Dict[str, str] = {}
for conn in plc.outbound_connections:
if isinstance(conn, TCPConnection) and conn.id:
conn_to_ip[conn.id] = conn.ip
# Validate monitors (skip RTU connections - they don't have IP lookup)
for i, monitor in enumerate(plc.monitors):
# Skip if connection is RTU (not TCP)
if monitor.outbound_connection_id not in conn_to_ip:
# Could be RTU connection - skip silently for PLCs
continue
entity = f"{plc_name}.monitors[{i}] (id='{monitor.id}')"
target_ip = conn_to_ip[monitor.outbound_connection_id]
if target_ip not in device_by_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"Target IP '{target_ip}' not found in any device. "
f"Available IPs: {sorted(device_by_ip.keys())}"
)
))
continue
device_type, device = device_by_ip[target_ip]
reg_info = _find_register_in_block(device.registers, monitor.id)
if reg_info is None:
errors.append(SemanticError(
entity=entity,
message=(
f"Register '{monitor.id}' not found on {device_type} "
f"'{device.name}' (IP: {target_ip})"
)
))
# Validate controllers (skip RTU connections)
for i, controller in enumerate(plc.controllers):
if controller.outbound_connection_id not in conn_to_ip:
continue
entity = f"{plc_name}.controllers[{i}] (id='{controller.id}')"
target_ip = conn_to_ip[controller.outbound_connection_id]
if target_ip not in device_by_ip:
errors.append(SemanticError(
entity=entity,
message=(
f"Target IP '{target_ip}' not found in any device. "
f"Available IPs: {sorted(device_by_ip.keys())}"
)
))
continue
device_type, device = device_by_ip[target_ip]
reg_info = _find_register_in_block(device.registers, controller.id)
if reg_info is None:
errors.append(SemanticError(
entity=entity,
message=(
f"Register '{controller.id}' not found on {device_type} "
f"'{device.name}' (IP: {target_ip})"
)
))
return errors
def validate_orphan_devices(config: Config) -> List[SemanticError]:
"""
Validate that all sensors and actuators are referenced by at least one PLC.
P0 Issue: Open-loop sensors/actuators are useless and indicate config error.
Rules:
- Each sensor must be referenced by at least one PLC monitor (outbound_connection IP match)
- Each actuator must be referenced by at least one PLC controller (outbound_connection IP match)
Args:
config: Validated Config object
Returns:
List of SemanticError objects for orphan devices
"""
errors: List[SemanticError] = []
# Collect all sensor IPs
sensor_ips: Dict[str, str] = {} # ip -> sensor name
for sensor in config.sensors:
if sensor.network and sensor.network.ip:
sensor_ips[sensor.network.ip] = sensor.name
# Collect all actuator IPs
actuator_ips: Dict[str, str] = {} # ip -> actuator name
for actuator in config.actuators:
if actuator.network and actuator.network.ip:
actuator_ips[actuator.network.ip] = actuator.name
# Collect all IPs referenced by PLC outbound connections for monitors
plc_monitor_target_ips: set = set()
plc_controller_target_ips: set = set()
for plc in config.plcs:
# Build connection_id -> IP mapping
conn_to_ip: Dict[str, str] = {}
for conn in plc.outbound_connections:
if isinstance(conn, TCPConnection) and conn.id:
conn_to_ip[conn.id] = conn.ip
# Collect IPs targeted by monitors
for monitor in plc.monitors:
if monitor.outbound_connection_id in conn_to_ip:
plc_monitor_target_ips.add(conn_to_ip[monitor.outbound_connection_id])
# Collect IPs targeted by controllers
for controller in plc.controllers:
if controller.outbound_connection_id in conn_to_ip:
plc_controller_target_ips.add(conn_to_ip[controller.outbound_connection_id])
# Check for orphan sensors (not monitored by any PLC)
for sensor_ip, sensor_name in sensor_ips.items():
if sensor_ip not in plc_monitor_target_ips:
errors.append(SemanticError(
entity=f"sensors['{sensor_name}']",
message=(
f"Orphan sensor: no PLC monitor references IP {sensor_ip}. "
f"Add a PLC outbound_connection and monitor for this sensor."
)
))
# Check for orphan actuators (not controlled by any PLC)
for actuator_ip, actuator_name in actuator_ips.items():
if actuator_ip not in plc_controller_target_ips:
errors.append(SemanticError(
entity=f"actuators['{actuator_name}']",
message=(
f"Orphan actuator: no PLC controller references IP {actuator_ip}. "
f"Add a PLC outbound_connection and controller for this actuator."
)
))
return errors
def validate_boolean_type_rules(config: Config) -> List[SemanticError]:
"""
Validate that boolean signals use correct Modbus register types.
P0 Issue: Boolean signals mapped to input_register/holding_register are incorrect.
Modbus type rules:
- Commanded boolean (write) -> coil (function code 5/15)
- Measured boolean (read-only) -> discrete_input (function code 2)
- input_register/holding_register are for 16-bit integers, not booleans
Heuristics for detecting boolean signals:
- physical_value contains: "switch", "state", "status", "at_", "is_", "on", "off", "enable", "active"
- count == 1 AND address suggests single-bit semantics
Args:
config: Validated Config object
Returns:
List of SemanticError objects for type rule violations
"""
errors: List[SemanticError] = []
# Boolean indicator patterns (case-insensitive)
BOOLEAN_PATTERNS = [
"switch", "state", "status", "at_", "is_", "_on", "_off",
"enable", "active", "running", "alarm", "fault", "ready",
"open", "close", "start", "stop", "button", "flag"
]
def looks_like_boolean(name: str) -> bool:
"""Check if a physical_value name suggests boolean semantics."""
if not name:
return False
name_lower = name.lower()
return any(pattern in name_lower for pattern in BOOLEAN_PATTERNS)
# Check sensors - boolean values should use discrete_input, not input_register
for sensor in config.sensors:
for reg in sensor.registers.input_register:
pv = reg.physical_value or ""
if looks_like_boolean(pv):
errors.append(SemanticError(
entity=f"sensors['{sensor.name}'].registers.input_register (physical_value='{pv}')",
message=(
f"Boolean signal '{pv}' should use discrete_input, not input_register. "
f"Move this register to discrete_input for proper Modbus function code."
)
))
# Check actuators - boolean values should use coil, not holding_register
for actuator in config.actuators:
for reg in actuator.registers.holding_register:
pv = reg.physical_value or ""
if looks_like_boolean(pv):
errors.append(SemanticError(
entity=f"actuators['{actuator.name}'].registers.holding_register (physical_value='{pv}')",
message=(
f"Boolean signal '{pv}' should use coil, not holding_register. "
f"Move this register to coil for proper Modbus function code."
)
))
# Check PLCs - boolean inputs should be discrete_input, boolean outputs should be coil
for plc in config.plcs:
for reg in plc.registers.input_register:
reg_id = reg.id or ""
if looks_like_boolean(reg_id):
errors.append(SemanticError(
entity=f"plcs['{plc.name}'].registers.input_register (id='{reg_id}')",
message=(
f"Boolean signal '{reg_id}' should use discrete_input (for input) "
f"or coil (for output), not input_register."
)
))
for reg in plc.registers.holding_register:
reg_id = reg.id or ""
if looks_like_boolean(reg_id):
errors.append(SemanticError(
entity=f"plcs['{plc.name}'].registers.holding_register (id='{reg_id}')",
message=(
f"Boolean signal '{reg_id}' should use coil (for output) "
f"or discrete_input (for input), not holding_register."
)
))
return errors
def validate_plc_local_register_coherence(config: Config) -> List[SemanticError]:
"""
Validate PLC local register coherence with monitors/controllers.
Native ICS-SimLab pattern requires that:
- For each PLC monitor with id=X and value_type=T, there should be a
local register in plc.registers[T] with id=X and io="input"
- For each PLC controller with id=Y and value_type=T, there should be a
local register in plc.registers[T] with id=Y and io="output"
This ensures the PLC has local registers to cache monitored values
and source controlled values, matching native example patterns.
Args:
config: Validated Config object
Returns:
List of SemanticError objects for coherence violations
"""
errors: List[SemanticError] = []
for plc in config.plcs:
plc_name = plc.name
# Build set of existing registers by type -> id -> io
existing_regs: Dict[str, Dict[str, str]] = {
"coil": {},
"discrete_input": {},
"holding_register": {},
"input_register": {},
}
for reg in plc.registers.coil:
if reg.id:
existing_regs["coil"][reg.id] = reg.io or ""
for reg in plc.registers.discrete_input:
if reg.id:
existing_regs["discrete_input"][reg.id] = reg.io or ""
for reg in plc.registers.holding_register:
if reg.id:
existing_regs["holding_register"][reg.id] = reg.io or ""
for reg in plc.registers.input_register:
if reg.id:
existing_regs["input_register"][reg.id] = reg.io or ""
# Check monitors: each monitor.id should have a local register with io="input"
for i, monitor in enumerate(plc.monitors):
monitor_id = monitor.id
value_type = monitor.value_type # e.g., "input_register"
if value_type not in existing_regs:
errors.append(SemanticError(
entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')",
message=f"Unknown value_type '{value_type}'"
))
continue
if monitor_id not in existing_regs[value_type]:
errors.append(SemanticError(
entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')",
message=(
f"Missing local register: {plc_name}.registers.{value_type} "
f"should contain a register with id='{monitor_id}' and io='input' "
f"to cache monitored values (native pattern)"
)
))
else:
# Check io direction
actual_io = existing_regs[value_type][monitor_id]
if actual_io and actual_io != "input":
errors.append(SemanticError(
entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')",
message=(
f"Register io mismatch: {plc_name}.registers.{value_type}['{monitor_id}'] "
f"has io='{actual_io}' but monitors require io='input'"
)
))
# Check controllers: each controller.id should have a local register with io="output"
for i, controller in enumerate(plc.controllers):
controller_id = controller.id
value_type = controller.value_type
if value_type not in existing_regs:
errors.append(SemanticError(
entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')",
message=f"Unknown value_type '{value_type}'"
))
continue
if controller_id not in existing_regs[value_type]:
errors.append(SemanticError(
entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')",
message=(
f"Missing local register: {plc_name}.registers.{value_type} "
f"should contain a register with id='{controller_id}' and io='output' "
f"to source controlled values (native pattern)"
)
))
else:
# Check io direction
actual_io = existing_regs[value_type][controller_id]
if actual_io and actual_io != "output":
errors.append(SemanticError(
entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')",
message=(
f"Register io mismatch: {plc_name}.registers.{value_type}['{controller_id}'] "
f"has io='{actual_io}' but controllers require io='output'"
)
))
return errors
def validate_network_config(config: Config) -> List[SemanticError]:
"""
Validate network configuration: no duplicate IPs, valid subnets.
P0 Issue: ICS-SimLab docker-compose fails with "Address already in use"
when multiple devices share the same IP on the same docker_network.
Checks performed:
1. No duplicate network.ip within the same docker_network
2. Each device.network.docker_network exists in ip_networks[]
3. Each device IP is within the declared subnet for that network
Args:
config: Validated Config object
Returns:
List of SemanticError objects for network issues
"""
errors: List[SemanticError] = []
# Build ip_networks lookup: docker_name -> (name, subnet)
networks: Dict[str, Tuple[str, str]] = {}
for net in config.ip_networks:
networks[net.docker_name] = (net.name, net.subnet)
# Collect all devices with network config
# Structure: List[(entity_name, device_type, ip, docker_network)]
devices_with_network: List[Tuple[str, str, str, str]] = []
# UI has network config
if config.ui and config.ui.network:
net = config.ui.network
docker_net = net.docker_network or "default"
devices_with_network.append(("ui", "ui", net.ip, docker_net))
# HMIs have network config
for hmi in config.hmis:
if hmi.network:
docker_net = hmi.network.docker_network or "default"
devices_with_network.append((hmi.name, "hmi", hmi.network.ip, docker_net))
# PLCs have network config (optional but common)
for plc in config.plcs:
if plc.network:
docker_net = plc.network.docker_network or "default"
devices_with_network.append((plc.name, "plc", plc.network.ip, docker_net))
# Sensors have network config
for sensor in config.sensors:
if sensor.network:
docker_net = sensor.network.docker_network or "default"
devices_with_network.append((sensor.name, "sensor", sensor.network.ip, docker_net))
# Actuators have network config
for actuator in config.actuators:
if actuator.network:
docker_net = actuator.network.docker_network or "default"
devices_with_network.append((actuator.name, "actuator", actuator.network.ip, docker_net))
# Note: HILs do NOT have network config (they're simulation-only)
# Group by docker_network for duplicate detection
by_network: Dict[str, List[Tuple[str, str, str]]] = {} # docker_net -> [(name, type, ip)]
for name, dev_type, ip, docker_net in devices_with_network:
if docker_net not in by_network:
by_network[docker_net] = []
by_network[docker_net].append((name, dev_type, ip))
# Check 1: Duplicate IPs within same docker_network
for docker_net, devices in by_network.items():
# Build ip -> list of devices mapping
ip_to_devices: Dict[str, List[Tuple[str, str]]] = {} # ip -> [(name, type)]
for name, dev_type, ip in devices:
if ip not in ip_to_devices:
ip_to_devices[ip] = []
ip_to_devices[ip].append((name, dev_type))
# Report duplicates
for ip, device_list in ip_to_devices.items():
if len(device_list) > 1:
colliders = ", ".join(f"{name} ({dtype})" for name, dtype in device_list)
errors.append(SemanticError(
entity=f"network[{docker_net}]",
message=(
f"Duplicate IP {ip}: {colliders}. "
f"Each device must have a unique IP within the same docker_network."
)
))
# Check 2: docker_network exists in ip_networks[]
for name, dev_type, ip, docker_net in devices_with_network:
if docker_net != "default" and docker_net not in networks:
available = sorted(networks.keys()) if networks else ["(none)"]
errors.append(SemanticError(
entity=f"{name} ({dev_type})",
message=(
f"docker_network '{docker_net}' not found in ip_networks. "
f"Available: {available}"
)
))
# Check 3: IP is within subnet
for name, dev_type, ip, docker_net in devices_with_network:
if docker_net not in networks:
continue # Already reported in check 2
_, subnet_str = networks[docker_net]
try:
network = ipaddress.ip_network(subnet_str, strict=False)
ip_addr = ipaddress.ip_address(ip)
if ip_addr not in network:
errors.append(SemanticError(
entity=f"{name} ({dev_type})",
message=(
f"IP {ip} is not within subnet {subnet_str} "
f"for docker_network '{docker_net}'"
)
))
except ValueError as e:
# Invalid IP or subnet format
errors.append(SemanticError(
entity=f"{name} ({dev_type})",
message=f"Invalid IP/subnet format: {e}"
))
return errors
def validate_all_semantics(config: Config) -> List[SemanticError]:
"""
Run all semantic validations.
Args:
config: Validated Config object
Returns:
List of all SemanticError objects
"""
errors: List[SemanticError] = []
# P0: Network validation first (docker-compose fails if IPs collide)
errors.extend(validate_network_config(config))
errors.extend(validate_hmi_semantics(config))
errors.extend(validate_plc_semantics(config))
errors.extend(validate_orphan_devices(config))
errors.extend(validate_boolean_type_rules(config))
errors.extend(validate_plc_local_register_coherence(config))
return errors