#!/usr/bin/env python3 """ Semantic validation for ICS-SimLab configuration. Validates that HMI monitors and controllers correctly reference: 1. Valid outbound_connection_id in HMI's outbound_connections 2. Reachable target device (by IP) 3. Existing register on target device (by id) 4. Matching value_type and address 5. Network configuration: no duplicate IPs, valid subnets This is deterministic validation - no guessing or heuristics. If something cannot be verified, it fails with a clear error. """ import ipaddress from dataclasses import dataclass from typing import Dict, List, Optional, Set, Tuple, Union from models.ics_simlab_config_v2 import ( Config, HMI, PLC, Sensor, Actuator, RegisterBlock, TCPConnection, IPNetwork, ) @dataclass class SemanticError: """A semantic validation error.""" entity: str # e.g., "hmi1.monitors[0]" message: str def __str__(self) -> str: return f"{self.entity}: {self.message}" Device = Union[PLC, Sensor, Actuator] def _build_device_by_ip(config: Config) -> Dict[str, Tuple[str, Device]]: """ Build mapping from IP address to (device_type, device_object). Only TCP-connected devices are indexed (RTU devices use serial ports). """ mapping: Dict[str, Tuple[str, Device]] = {} for plc in config.plcs: if plc.network and plc.network.ip: mapping[plc.network.ip] = ("plc", plc) for sensor in config.sensors: if sensor.network and sensor.network.ip: mapping[sensor.network.ip] = ("sensor", sensor) for actuator in config.actuators: if actuator.network and actuator.network.ip: mapping[actuator.network.ip] = ("actuator", actuator) return mapping def _find_register_in_block( registers: RegisterBlock, register_id: str, ) -> Optional[Tuple[str, int, int]]: """ Find a register by id in a RegisterBlock. Args: registers: The RegisterBlock to search register_id: The register id to find Returns: (value_type, address, count) if found, None otherwise """ for reg_type, reg_list in [ ("coil", registers.coil), ("discrete_input", registers.discrete_input), ("holding_register", registers.holding_register), ("input_register", registers.input_register), ]: for reg in reg_list: # Match by id or physical_value (sensors use physical_value) if reg.id == register_id or reg.physical_value == register_id: return (reg_type, reg.address, reg.count) return None def validate_hmi_semantics(config: Config) -> List[SemanticError]: """ Validate HMI monitors and controllers semantically. For each monitor/controller: 1. Verify outbound_connection_id exists in HMI's outbound_connections 2. Verify target device (by IP) exists 3. Verify register exists on target device 4. Verify value_type and address match target register Args: config: Validated Config object Returns: List of SemanticError objects (empty if all valid) """ errors: List[SemanticError] = [] device_by_ip = _build_device_by_ip(config) for hmi in config.hmis: hmi_name = hmi.name # Build connection_id -> target_ip mapping (TCP connections only) conn_to_ip: Dict[str, str] = {} for conn in hmi.outbound_connections: if isinstance(conn, TCPConnection) and conn.id: conn_to_ip[conn.id] = conn.ip # Validate monitors for i, monitor in enumerate(hmi.monitors): entity = f"{hmi_name}.monitors[{i}] (id='{monitor.id}')" # Check outbound_connection exists if monitor.outbound_connection_id not in conn_to_ip: errors.append(SemanticError( entity=entity, message=( f"outbound_connection_id '{monitor.outbound_connection_id}' " f"not found in HMI outbound_connections. " f"Available: {sorted(conn_to_ip.keys())}" ) )) continue target_ip = conn_to_ip[monitor.outbound_connection_id] # Check target device exists if target_ip not in device_by_ip: errors.append(SemanticError( entity=entity, message=( f"Target IP '{target_ip}' not found in any device. " f"Available IPs: {sorted(device_by_ip.keys())}" ) )) continue device_type, device = device_by_ip[target_ip] # Check register exists on target reg_info = _find_register_in_block(device.registers, monitor.id) if reg_info is None: errors.append(SemanticError( entity=entity, message=( f"Register '{monitor.id}' not found on {device_type} " f"'{device.name}' (IP: {target_ip})" ) )) continue expected_type, expected_addr, expected_count = reg_info # Verify value_type matches (no guessing - must match exactly) if monitor.value_type != expected_type: errors.append(SemanticError( entity=entity, message=( f"value_type mismatch: monitor has '{monitor.value_type}' " f"but {device.name}.{monitor.id} is '{expected_type}'" ) )) # Verify address matches if monitor.address != expected_addr: errors.append(SemanticError( entity=entity, message=( f"address mismatch: monitor has {monitor.address} " f"but {device.name}.{monitor.id} is at address {expected_addr}" ) )) # Validate controllers (same logic as monitors) for i, controller in enumerate(hmi.controllers): entity = f"{hmi_name}.controllers[{i}] (id='{controller.id}')" # Check outbound_connection exists if controller.outbound_connection_id not in conn_to_ip: errors.append(SemanticError( entity=entity, message=( f"outbound_connection_id '{controller.outbound_connection_id}' " f"not found in HMI outbound_connections. " f"Available: {sorted(conn_to_ip.keys())}" ) )) continue target_ip = conn_to_ip[controller.outbound_connection_id] # Check target device exists if target_ip not in device_by_ip: errors.append(SemanticError( entity=entity, message=( f"Target IP '{target_ip}' not found in any device. " f"Available IPs: {sorted(device_by_ip.keys())}" ) )) continue device_type, device = device_by_ip[target_ip] # Check register exists on target reg_info = _find_register_in_block(device.registers, controller.id) if reg_info is None: errors.append(SemanticError( entity=entity, message=( f"Register '{controller.id}' not found on {device_type} " f"'{device.name}' (IP: {target_ip})" ) )) continue expected_type, expected_addr, expected_count = reg_info # Verify value_type matches if controller.value_type != expected_type: errors.append(SemanticError( entity=entity, message=( f"value_type mismatch: controller has '{controller.value_type}' " f"but {device.name}.{controller.id} is '{expected_type}'" ) )) # Verify address matches if controller.address != expected_addr: errors.append(SemanticError( entity=entity, message=( f"address mismatch: controller has {controller.address} " f"but {device.name}.{controller.id} is at address {expected_addr}" ) )) return errors def validate_plc_semantics(config: Config) -> List[SemanticError]: """ Validate PLC monitors and controllers semantically. Similar to HMI validation but for PLC-to-sensor/actuator connections. Args: config: Validated Config object Returns: List of SemanticError objects (empty if all valid) """ errors: List[SemanticError] = [] device_by_ip = _build_device_by_ip(config) for plc in config.plcs: plc_name = plc.name # Build connection_id -> target_ip mapping (TCP connections only) conn_to_ip: Dict[str, str] = {} for conn in plc.outbound_connections: if isinstance(conn, TCPConnection) and conn.id: conn_to_ip[conn.id] = conn.ip # Validate monitors (skip RTU connections - they don't have IP lookup) for i, monitor in enumerate(plc.monitors): # Skip if connection is RTU (not TCP) if monitor.outbound_connection_id not in conn_to_ip: # Could be RTU connection - skip silently for PLCs continue entity = f"{plc_name}.monitors[{i}] (id='{monitor.id}')" target_ip = conn_to_ip[monitor.outbound_connection_id] if target_ip not in device_by_ip: errors.append(SemanticError( entity=entity, message=( f"Target IP '{target_ip}' not found in any device. " f"Available IPs: {sorted(device_by_ip.keys())}" ) )) continue device_type, device = device_by_ip[target_ip] reg_info = _find_register_in_block(device.registers, monitor.id) if reg_info is None: errors.append(SemanticError( entity=entity, message=( f"Register '{monitor.id}' not found on {device_type} " f"'{device.name}' (IP: {target_ip})" ) )) # Validate controllers (skip RTU connections) for i, controller in enumerate(plc.controllers): if controller.outbound_connection_id not in conn_to_ip: continue entity = f"{plc_name}.controllers[{i}] (id='{controller.id}')" target_ip = conn_to_ip[controller.outbound_connection_id] if target_ip not in device_by_ip: errors.append(SemanticError( entity=entity, message=( f"Target IP '{target_ip}' not found in any device. " f"Available IPs: {sorted(device_by_ip.keys())}" ) )) continue device_type, device = device_by_ip[target_ip] reg_info = _find_register_in_block(device.registers, controller.id) if reg_info is None: errors.append(SemanticError( entity=entity, message=( f"Register '{controller.id}' not found on {device_type} " f"'{device.name}' (IP: {target_ip})" ) )) return errors def validate_orphan_devices(config: Config) -> List[SemanticError]: """ Validate that all sensors and actuators are referenced by at least one PLC. P0 Issue: Open-loop sensors/actuators are useless and indicate config error. Rules: - Each sensor must be referenced by at least one PLC monitor (outbound_connection IP match) - Each actuator must be referenced by at least one PLC controller (outbound_connection IP match) Args: config: Validated Config object Returns: List of SemanticError objects for orphan devices """ errors: List[SemanticError] = [] # Collect all sensor IPs sensor_ips: Dict[str, str] = {} # ip -> sensor name for sensor in config.sensors: if sensor.network and sensor.network.ip: sensor_ips[sensor.network.ip] = sensor.name # Collect all actuator IPs actuator_ips: Dict[str, str] = {} # ip -> actuator name for actuator in config.actuators: if actuator.network and actuator.network.ip: actuator_ips[actuator.network.ip] = actuator.name # Collect all IPs referenced by PLC outbound connections for monitors plc_monitor_target_ips: set = set() plc_controller_target_ips: set = set() for plc in config.plcs: # Build connection_id -> IP mapping conn_to_ip: Dict[str, str] = {} for conn in plc.outbound_connections: if isinstance(conn, TCPConnection) and conn.id: conn_to_ip[conn.id] = conn.ip # Collect IPs targeted by monitors for monitor in plc.monitors: if monitor.outbound_connection_id in conn_to_ip: plc_monitor_target_ips.add(conn_to_ip[monitor.outbound_connection_id]) # Collect IPs targeted by controllers for controller in plc.controllers: if controller.outbound_connection_id in conn_to_ip: plc_controller_target_ips.add(conn_to_ip[controller.outbound_connection_id]) # Check for orphan sensors (not monitored by any PLC) for sensor_ip, sensor_name in sensor_ips.items(): if sensor_ip not in plc_monitor_target_ips: errors.append(SemanticError( entity=f"sensors['{sensor_name}']", message=( f"Orphan sensor: no PLC monitor references IP {sensor_ip}. " f"Add a PLC outbound_connection and monitor for this sensor." ) )) # Check for orphan actuators (not controlled by any PLC) for actuator_ip, actuator_name in actuator_ips.items(): if actuator_ip not in plc_controller_target_ips: errors.append(SemanticError( entity=f"actuators['{actuator_name}']", message=( f"Orphan actuator: no PLC controller references IP {actuator_ip}. " f"Add a PLC outbound_connection and controller for this actuator." ) )) return errors def validate_boolean_type_rules(config: Config) -> List[SemanticError]: """ Validate that boolean signals use correct Modbus register types. P0 Issue: Boolean signals mapped to input_register/holding_register are incorrect. Modbus type rules: - Commanded boolean (write) -> coil (function code 5/15) - Measured boolean (read-only) -> discrete_input (function code 2) - input_register/holding_register are for 16-bit integers, not booleans Heuristics for detecting boolean signals: - physical_value contains: "switch", "state", "status", "at_", "is_", "on", "off", "enable", "active" - count == 1 AND address suggests single-bit semantics Args: config: Validated Config object Returns: List of SemanticError objects for type rule violations """ errors: List[SemanticError] = [] # Boolean indicator patterns (case-insensitive) BOOLEAN_PATTERNS = [ "switch", "state", "status", "at_", "is_", "_on", "_off", "enable", "active", "running", "alarm", "fault", "ready", "open", "close", "start", "stop", "button", "flag" ] def looks_like_boolean(name: str) -> bool: """Check if a physical_value name suggests boolean semantics.""" if not name: return False name_lower = name.lower() return any(pattern in name_lower for pattern in BOOLEAN_PATTERNS) # Check sensors - boolean values should use discrete_input, not input_register for sensor in config.sensors: for reg in sensor.registers.input_register: pv = reg.physical_value or "" if looks_like_boolean(pv): errors.append(SemanticError( entity=f"sensors['{sensor.name}'].registers.input_register (physical_value='{pv}')", message=( f"Boolean signal '{pv}' should use discrete_input, not input_register. " f"Move this register to discrete_input for proper Modbus function code." ) )) # Check actuators - boolean values should use coil, not holding_register for actuator in config.actuators: for reg in actuator.registers.holding_register: pv = reg.physical_value or "" if looks_like_boolean(pv): errors.append(SemanticError( entity=f"actuators['{actuator.name}'].registers.holding_register (physical_value='{pv}')", message=( f"Boolean signal '{pv}' should use coil, not holding_register. " f"Move this register to coil for proper Modbus function code." ) )) # Check PLCs - boolean inputs should be discrete_input, boolean outputs should be coil for plc in config.plcs: for reg in plc.registers.input_register: reg_id = reg.id or "" if looks_like_boolean(reg_id): errors.append(SemanticError( entity=f"plcs['{plc.name}'].registers.input_register (id='{reg_id}')", message=( f"Boolean signal '{reg_id}' should use discrete_input (for input) " f"or coil (for output), not input_register." ) )) for reg in plc.registers.holding_register: reg_id = reg.id or "" if looks_like_boolean(reg_id): errors.append(SemanticError( entity=f"plcs['{plc.name}'].registers.holding_register (id='{reg_id}')", message=( f"Boolean signal '{reg_id}' should use coil (for output) " f"or discrete_input (for input), not holding_register." ) )) return errors def validate_plc_local_register_coherence(config: Config) -> List[SemanticError]: """ Validate PLC local register coherence with monitors/controllers. Native ICS-SimLab pattern requires that: - For each PLC monitor with id=X and value_type=T, there should be a local register in plc.registers[T] with id=X and io="input" - For each PLC controller with id=Y and value_type=T, there should be a local register in plc.registers[T] with id=Y and io="output" This ensures the PLC has local registers to cache monitored values and source controlled values, matching native example patterns. Args: config: Validated Config object Returns: List of SemanticError objects for coherence violations """ errors: List[SemanticError] = [] for plc in config.plcs: plc_name = plc.name # Build set of existing registers by type -> id -> io existing_regs: Dict[str, Dict[str, str]] = { "coil": {}, "discrete_input": {}, "holding_register": {}, "input_register": {}, } for reg in plc.registers.coil: if reg.id: existing_regs["coil"][reg.id] = reg.io or "" for reg in plc.registers.discrete_input: if reg.id: existing_regs["discrete_input"][reg.id] = reg.io or "" for reg in plc.registers.holding_register: if reg.id: existing_regs["holding_register"][reg.id] = reg.io or "" for reg in plc.registers.input_register: if reg.id: existing_regs["input_register"][reg.id] = reg.io or "" # Check monitors: each monitor.id should have a local register with io="input" for i, monitor in enumerate(plc.monitors): monitor_id = monitor.id value_type = monitor.value_type # e.g., "input_register" if value_type not in existing_regs: errors.append(SemanticError( entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')", message=f"Unknown value_type '{value_type}'" )) continue if monitor_id not in existing_regs[value_type]: errors.append(SemanticError( entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')", message=( f"Missing local register: {plc_name}.registers.{value_type} " f"should contain a register with id='{monitor_id}' and io='input' " f"to cache monitored values (native pattern)" ) )) else: # Check io direction actual_io = existing_regs[value_type][monitor_id] if actual_io and actual_io != "input": errors.append(SemanticError( entity=f"{plc_name}.monitors[{i}] (id='{monitor_id}')", message=( f"Register io mismatch: {plc_name}.registers.{value_type}['{monitor_id}'] " f"has io='{actual_io}' but monitors require io='input'" ) )) # Check controllers: each controller.id should have a local register with io="output" for i, controller in enumerate(plc.controllers): controller_id = controller.id value_type = controller.value_type if value_type not in existing_regs: errors.append(SemanticError( entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')", message=f"Unknown value_type '{value_type}'" )) continue if controller_id not in existing_regs[value_type]: errors.append(SemanticError( entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')", message=( f"Missing local register: {plc_name}.registers.{value_type} " f"should contain a register with id='{controller_id}' and io='output' " f"to source controlled values (native pattern)" ) )) else: # Check io direction actual_io = existing_regs[value_type][controller_id] if actual_io and actual_io != "output": errors.append(SemanticError( entity=f"{plc_name}.controllers[{i}] (id='{controller_id}')", message=( f"Register io mismatch: {plc_name}.registers.{value_type}['{controller_id}'] " f"has io='{actual_io}' but controllers require io='output'" ) )) return errors def validate_network_config(config: Config) -> List[SemanticError]: """ Validate network configuration: no duplicate IPs, valid subnets. P0 Issue: ICS-SimLab docker-compose fails with "Address already in use" when multiple devices share the same IP on the same docker_network. Checks performed: 1. No duplicate network.ip within the same docker_network 2. Each device.network.docker_network exists in ip_networks[] 3. Each device IP is within the declared subnet for that network Args: config: Validated Config object Returns: List of SemanticError objects for network issues """ errors: List[SemanticError] = [] # Build ip_networks lookup: docker_name -> (name, subnet) networks: Dict[str, Tuple[str, str]] = {} for net in config.ip_networks: networks[net.docker_name] = (net.name, net.subnet) # Collect all devices with network config # Structure: List[(entity_name, device_type, ip, docker_network)] devices_with_network: List[Tuple[str, str, str, str]] = [] # UI has network config if config.ui and config.ui.network: net = config.ui.network docker_net = net.docker_network or "default" devices_with_network.append(("ui", "ui", net.ip, docker_net)) # HMIs have network config for hmi in config.hmis: if hmi.network: docker_net = hmi.network.docker_network or "default" devices_with_network.append((hmi.name, "hmi", hmi.network.ip, docker_net)) # PLCs have network config (optional but common) for plc in config.plcs: if plc.network: docker_net = plc.network.docker_network or "default" devices_with_network.append((plc.name, "plc", plc.network.ip, docker_net)) # Sensors have network config for sensor in config.sensors: if sensor.network: docker_net = sensor.network.docker_network or "default" devices_with_network.append((sensor.name, "sensor", sensor.network.ip, docker_net)) # Actuators have network config for actuator in config.actuators: if actuator.network: docker_net = actuator.network.docker_network or "default" devices_with_network.append((actuator.name, "actuator", actuator.network.ip, docker_net)) # Note: HILs do NOT have network config (they're simulation-only) # Group by docker_network for duplicate detection by_network: Dict[str, List[Tuple[str, str, str]]] = {} # docker_net -> [(name, type, ip)] for name, dev_type, ip, docker_net in devices_with_network: if docker_net not in by_network: by_network[docker_net] = [] by_network[docker_net].append((name, dev_type, ip)) # Check 1: Duplicate IPs within same docker_network for docker_net, devices in by_network.items(): # Build ip -> list of devices mapping ip_to_devices: Dict[str, List[Tuple[str, str]]] = {} # ip -> [(name, type)] for name, dev_type, ip in devices: if ip not in ip_to_devices: ip_to_devices[ip] = [] ip_to_devices[ip].append((name, dev_type)) # Report duplicates for ip, device_list in ip_to_devices.items(): if len(device_list) > 1: colliders = ", ".join(f"{name} ({dtype})" for name, dtype in device_list) errors.append(SemanticError( entity=f"network[{docker_net}]", message=( f"Duplicate IP {ip}: {colliders}. " f"Each device must have a unique IP within the same docker_network." ) )) # Check 2: docker_network exists in ip_networks[] for name, dev_type, ip, docker_net in devices_with_network: if docker_net != "default" and docker_net not in networks: available = sorted(networks.keys()) if networks else ["(none)"] errors.append(SemanticError( entity=f"{name} ({dev_type})", message=( f"docker_network '{docker_net}' not found in ip_networks. " f"Available: {available}" ) )) # Check 3: IP is within subnet for name, dev_type, ip, docker_net in devices_with_network: if docker_net not in networks: continue # Already reported in check 2 _, subnet_str = networks[docker_net] try: network = ipaddress.ip_network(subnet_str, strict=False) ip_addr = ipaddress.ip_address(ip) if ip_addr not in network: errors.append(SemanticError( entity=f"{name} ({dev_type})", message=( f"IP {ip} is not within subnet {subnet_str} " f"for docker_network '{docker_net}'" ) )) except ValueError as e: # Invalid IP or subnet format errors.append(SemanticError( entity=f"{name} ({dev_type})", message=f"Invalid IP/subnet format: {e}" )) return errors def validate_all_semantics(config: Config) -> List[SemanticError]: """ Run all semantic validations. Args: config: Validated Config object Returns: List of all SemanticError objects """ errors: List[SemanticError] = [] # P0: Network validation first (docker-compose fails if IPs collide) errors.extend(validate_network_config(config)) errors.extend(validate_hmi_semantics(config)) errors.extend(validate_plc_semantics(config)) errors.extend(validate_orphan_devices(config)) errors.extend(validate_boolean_type_rules(config)) errors.extend(validate_plc_local_register_coherence(config)) return errors