ics-simlab-config-gen-claude/scripts/e2e.sh

#!/bin/bash
#
# E2E Test for ICS-SimLab scenario
#
# Handles the operator_hmi startup race condition by:
# 1. Starting simlab
# 2. Waiting for PLCs to be ready (listening on port 502)
# 3. Restarting operator_hmi once PLCs are reachable
# 4. Verifying logs for successful reads
# 5. Saving logs and stopping simlab
#
# Usage:
#   ./scripts/e2e.sh [--no-stop]
#
#   --no-stop: Don't stop simlab at the end (for manual inspection)

set -e

# Configuration
REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"
SCENARIO_DIR="$REPO_DIR/outputs/scenario_run"
SIMLAB_DIR="/home/stefano/projects/ICS-SimLab-main/curtin-ics-simlab"
RUN_DIR="$REPO_DIR/outputs/run_$(date +%Y%m%d_%H%M%S)"

# Timeouts (seconds)
STARTUP_TIMEOUT=120
PLC_READY_TIMEOUT=60
HMI_VERIFY_DURATION=15

# Parse args
NO_STOP=false
for arg in "$@"; do
    case $arg in
        --no-stop) NO_STOP=true ;;
    esac
done

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }

cleanup() {
    if [ "$NO_STOP" = false ]; then
        log_info "Stopping simlab..."
        cd "$SIMLAB_DIR" && docker compose down 2>/dev/null || true
    else
        log_info "Leaving simlab running (--no-stop)"
    fi
}

trap cleanup EXIT

# Create run directory
mkdir -p "$RUN_DIR"
log_info "Run directory: $RUN_DIR"

# ==============================================================================
# Step 0: Verify prerequisites
# ==============================================================================
log_info "Step 0: Verifying prerequisites"

if [ ! -f "$SCENARIO_DIR/configuration.json" ]; then
    log_error "Scenario not found: $SCENARIO_DIR/configuration.json"
    log_info "Run: python3 build_scenario.py --out outputs/scenario_run --overwrite"
    exit 1
fi

if [ ! -f "$SIMLAB_DIR/start.sh" ]; then
    log_error "ICS-SimLab not found: $SIMLAB_DIR"
    exit 1
fi

log_info "Prerequisites OK"

# ==============================================================================
# Step 1: Stop any existing containers
# ==============================================================================
log_info "Step 1: Stopping any existing containers"
cd "$SIMLAB_DIR"
docker compose down 2>/dev/null || true
sleep 2

# ==============================================================================
# Step 2: Start simlab in background
# ==============================================================================
log_info "Step 2: Starting ICS-SimLab (this may take a while)..."

# Remove old simulation directory
rm -rf "$SIMLAB_DIR/simulation" 2>/dev/null || true

# Clean up docker
docker system prune -f >/dev/null 2>&1

# Activate venv and build
source "$SIMLAB_DIR/.venv/bin/activate"
python3 "$SIMLAB_DIR/main.py" "$SCENARIO_DIR" > "$RUN_DIR/setup.log" 2>&1

# Build containers
docker compose build >> "$RUN_DIR/setup.log" 2>&1

# Start in background
docker compose up -d >> "$RUN_DIR/setup.log" 2>&1

log_info "Simlab started (containers launching in background)"

# ==============================================================================
# Step 3: Wait for PLCs to be ready
# ==============================================================================
log_info "Step 3: Waiting for PLCs to be ready (timeout: ${PLC_READY_TIMEOUT}s)..."

wait_for_plc() {
    local plc_name=$1
    local plc_ip=$2
    local timeout=$3
    local elapsed=0

    while [ $elapsed -lt $timeout ]; do
        # Check if container is running
        if ! docker ps --format '{{.Names}}' | grep -q "^${plc_name}$"; then
            log_warn "$plc_name container not running yet..."
            sleep 2
            elapsed=$((elapsed + 2))
            continue
        fi

        # Check if port 502 is reachable from within the container
        if docker exec "$plc_name" timeout 2 bash -c "echo > /dev/tcp/$plc_ip/502" 2>/dev/null; then
            log_info "$plc_name ready at $plc_ip:502"
            return 0
        fi

        sleep 2
        elapsed=$((elapsed + 2))
    done

    log_error "$plc_name not ready after ${timeout}s"
    return 1
}

# Extract PLC IPs from configuration
PLC1_IP=$(jq -r '.plcs[0].network.ip' "$SCENARIO_DIR/configuration.json" 2>/dev/null || echo "192.168.100.21")
PLC2_IP=$(jq -r '.plcs[1].network.ip' "$SCENARIO_DIR/configuration.json" 2>/dev/null || echo "192.168.100.22")

# Wait for each PLC
if ! wait_for_plc "plc1" "$PLC1_IP" "$PLC_READY_TIMEOUT"; then
    log_error "PLC1 failed to start. Check logs: $RUN_DIR/plc1.log"
    docker logs plc1 > "$RUN_DIR/plc1.log" 2>&1 || true
    exit 1
fi

if ! wait_for_plc "plc2" "$PLC2_IP" "$PLC_READY_TIMEOUT"; then
    log_error "PLC2 failed to start. Check logs: $RUN_DIR/plc2.log"
    docker logs plc2 > "$RUN_DIR/plc2.log" 2>&1 || true
    exit 1
fi

# ==============================================================================
# Step 4: Restart operator_hmi
# ==============================================================================
log_info "Step 4: Restarting operator_hmi to recover from startup race condition"

docker compose restart operator_hmi
sleep 3

log_info "operator_hmi restarted"

# ==============================================================================
# Step 4.5: Run Modbus probe
# ==============================================================================
log_info "Step 4.5: Running Modbus probe..."

# Wait a moment for connections to stabilize
sleep 3

# Run probe from within the operator_hmi container (has pymodbus and network access)
PROBE_SCRIPT="$REPO_DIR/tools/probe_modbus.py"
if [ -f "$PROBE_SCRIPT" ]; then
    # Copy probe script and config to container
    docker cp "$PROBE_SCRIPT" operator_hmi:/tmp/probe_modbus.py
    docker cp "$SCENARIO_DIR/configuration.json" operator_hmi:/tmp/configuration.json

    # Run probe inside container
    docker exec operator_hmi python3 /tmp/probe_modbus.py \
        --config /tmp/configuration.json \
        > "$RUN_DIR/probe.txt" 2>&1 || true

    log_info "Probe results saved to $RUN_DIR/probe.txt"

    # Show summary
    if grep -q "Modbus OK: 0/" "$RUN_DIR/probe.txt" 2>/dev/null; then
        log_warn "Probe: ALL Modbus reads FAILED"
    elif grep -q "Modbus OK:" "$RUN_DIR/probe.txt" 2>/dev/null; then
        PROBE_SUMMARY=$(grep "Modbus OK:" "$RUN_DIR/probe.txt" | head -1)
        log_info "Probe: $PROBE_SUMMARY"
    fi
else
    log_warn "Probe script not found: $PROBE_SCRIPT"
fi

# ==============================================================================
# Step 5: Verify operator_hmi logs
# ==============================================================================
log_info "Step 5: Monitoring operator_hmi for ${HMI_VERIFY_DURATION}s..."

# Capture logs for verification duration
sleep "$HMI_VERIFY_DURATION"

# Save logs from all components
log_info "Saving logs..."
docker logs plc1 > "$RUN_DIR/plc1.log" 2>&1 || true
docker logs plc2 > "$RUN_DIR/plc2.log" 2>&1 || true
docker logs operator_hmi > "$RUN_DIR/operator_hmi.log" 2>&1 || true
docker logs physical_io_hil > "$RUN_DIR/physical_io_hil.log" 2>&1 || true
docker logs ui > "$RUN_DIR/ui.log" 2>&1 || true
docker logs water_tank_level_sensor > "$RUN_DIR/water_tank_level_sensor.log" 2>&1 || true
docker logs bottle_fill_level_sensor > "$RUN_DIR/bottle_fill_level_sensor.log" 2>&1 || true
docker logs bottle_at_filler_sensor > "$RUN_DIR/bottle_at_filler_sensor.log" 2>&1 || true

# Check for success indicators
HMI_ERRORS=$(grep -c "couldn't read values" "$RUN_DIR/operator_hmi.log" 2>/dev/null | head -1 || echo "0")
PLC1_CRASHES=$(grep -Ec "Exception|Traceback" "$RUN_DIR/plc1.log" 2>/dev/null | head -1 || echo "0")
PLC2_CRASHES=$(grep -Ec "Exception|Traceback" "$RUN_DIR/plc2.log" 2>/dev/null | head -1 || echo "0")

# Extract probe summary if available
PROBE_TCP=$(grep "TCP reachable:" "$RUN_DIR/probe.txt" 2>/dev/null || echo "N/A")
PROBE_MODBUS=$(grep "Modbus OK:" "$RUN_DIR/probe.txt" 2>/dev/null || echo "N/A")

# ==============================================================================
# Step 6: Generate summary
# ==============================================================================
log_info "Step 6: Generating summary"

cat > "$RUN_DIR/summary.txt" << EOF
E2E Test Run: $(date)
Scenario: $SCENARIO_DIR

Results:
- PLC1 exceptions: $PLC1_CRASHES
- PLC2 exceptions: $PLC2_CRASHES
- HMI read errors: $HMI_ERRORS

Modbus Probe:
- $PROBE_TCP
- $PROBE_MODBUS

Container Status:
$(docker ps --format "{{.Names}}: {{.Status}}" | grep -E "plc|hmi|hil|sensor|actuator" | sort)

Notes:
- Some initial HMI read errors are expected due to startup race condition
- Errors after HMI restart indicate deeper connectivity/configuration issues
- See probe.txt for detailed Modbus diagnostics
- Check individual logs in this directory for details
EOF

cat "$RUN_DIR/summary.txt"

# ==============================================================================
# Determine exit status
# ==============================================================================
EXIT_CODE=0

if [ "$PLC1_CRASHES" -gt 0 ]; then
    log_error "PLC1 has exceptions - check $RUN_DIR/plc1.log"
    EXIT_CODE=1
fi

if [ "$PLC2_CRASHES" -gt 0 ]; then
    log_error "PLC2 has exceptions - check $RUN_DIR/plc2.log"
    EXIT_CODE=1
fi

if [ "$EXIT_CODE" -eq 0 ]; then
    log_info "E2E test completed successfully"
else
    log_error "E2E test completed with errors"
fi

log_info "Logs saved to: $RUN_DIR"

exit $EXIT_CODE