6.5 Troubleshooting Tools & Commands


This page provides diagnostic commands, log locations, and debugging techniques for troubleshooting issues with the Australian Dataspace Testbed Platform.

System Status Commands

Check Instance Status

# View system uptime and load
uptime

# Check system resource usage (if installed)
htop
# Press 'q' to quit

# Alternative resource monitor
top
# Press 'q' to quit

# Check memory usage
free -h

# Check disk space
df -h

# Check disk usage by directory
du -sh /home/ec2-user/*

Check Running Services

# Check all systemd services
sudo systemctl status <service-name>

# Check specific service status
sudo systemctl status code-server

# List all running services
sudo systemctl list-units --type=service --state=running

Network Diagnostics

# Check listening ports
sudo netstat -tulpn
# or
sudo ss -tulpn

# Test connectivity to external services
ping -c 4 google.com

# Check DNS resolution
nslookup google.com

# Test specific port connectivity (if installed)
telnet localhost 8000
# Press Ctrl+] then type 'quit' to exit

# Alternative test specific port connectivity
sudo lsof -i :8000

# Check firewall rules
sudo firewall-cmd --list-all

# View network interfaces
ip addr show

Docker Diagnostics

Container Status

# List running containers
sudo docker ps

# List all containers (including stopped)
sudo docker ps -a

# Check container resource usage
sudo docker stats
# Press Ctrl+C to quit

# View container details
sudo docker inspect <container-name>

# Check Docker service status
sudo systemctl status docker

Container Logs

# View logs for all services
sudo docker compose logs

# View logs for specific service
sudo docker compose logs omejdn
sudo docker compose logs omejdn-server
sudo docker compose logs connectora
sudo docker compose logs connectorb
sudo docker compose logs broker-reverseproxy
sudo docker compose logs dashboard
sudo docker compose logs rda-broker

# Follow logs in real-time
sudo docker compose logs -f

# View last 100 lines
sudo docker compose logs --tail=100

# View logs with timestamps
sudo docker compose logs -t

Docker Management

# Restart all containers
sudo docker compose restart

# Restart specific container
sudo docker compose restart connectora

# Stop all containers
sudo docker compose stop

# Start all containers
sudo docker compose start

# Stop and remove all containers
sudo docker compose down

# Completely stop and remove all containers and volumes
sudo docker compose down -v

# Rebuild and restart containers
sudo docker compose up -d --force-recreate

# Check Docker disk usage
sudo docker system df

# Clean up unused resources
sudo docker system prune -a

Log File Locations

Bootstrap & Installation Logs

# Cloud-init output (bootstrap script execution)
sudo tail -f /var/log/cloud-init-output.log

# Cloud-init logs
sudo tail -f /var/log/cloud-init.log
# Press Ctrl+C to quit

# View entire bootstrap log
sudo less /var/log/cloud-init-output.log
# Press 'q' to quit

System Logs

# View system journal
sudo journalctl -xe

# View logs for specific service
sudo journalctl -u code-server
sudo journalctl -u dashboard-operator
sudo journalctl -u docker

# Follow logs in real-time
sudo journalctl -f

# View logs since last boot
sudo journalctl -b

# View logs for specific time period
sudo journalctl --since "2024-01-01 00:00:00"
sudo journalctl --since "1 hour ago"

Application Logs

# Dashboard operator logs
sudo journalctl -u dashboard-operator -f

# Code-server logs
sudo journalctl -u code-server -f

# Docker service logs
sudo journalctl -u docker -f

Dataspace Component Diagnostics

DAPS

# Check DAPS containers
sudo docker ps | grep omejdn

# View DAPS logs
sudo docker compose logs omejdn-server

# View DAPS UI logs
sudo docker compose logs omejdn-ui

Connectors

# Check connector status
sudo docker ps | grep connector

# View Connector A logs
sudo docker compose logs connectora

# View Connector B logs
sudo docker compose logs connectorb

# Test connector endpoint
curl -k https://localhost/connectora/
curl -k https://localhost/connectorb/

Broker

# Check broker containers
sudo docker ps | grep broker

# View broker logs
sudo docker compose logs broker-reverseproxy
sudo docker compose logs rda-broker

Dashboard

# Check dashboard container
sudo docker ps | grep dashboard

# View dashboard logs
sudo docker compose logs dashboard

# Check dashboard operator service
sudo systemctl status dashboard-operator

# View dashboard operator logs
sudo journalctl -u dashboard-operator -n 50

Network Troubleshooting

Port Connectivity

# Check if port is listening
sudo netstat -tulpn | grep :443
sudo netstat -tulpn | grep :8000
sudo netstat -tulpn | grep :8080

# Test local port connectivity
curl -k https://localhost:443
curl -k https://localhost:8000

# Check firewall status
sudo firewall-cmd --state

# List open ports
sudo firewall-cmd --list-ports

# Test external connectivity (from another machine)
curl -k https://<instance-ip>:443

Security Group Verification

# From your local machine, test connectivity
telnet <instance-ip> 22
telnet <instance-ip> 443

# Test HTTPS connectivity
curl -k https://<instance-ip>/dashboard

# Check if your IP is allowed (view from cloud provider console)
# AWS: EC2 > Security Groups > Inbound Rules
# Azure: VM > Networking > Inbound port rules
# Nectar: Network > Security Groups > Manage Rules

Performance Analysis

CPU Usage

# Real-time CPU monitoring
top
# Press '1' to show individual cores
# Press 'P' to sort by CPU usage

# Check CPU information
lscpu

# View CPU usage history
sar -u 1 10

Memory Analysis

# Detailed memory usage
free -h

# Memory usage by process
ps aux --sort=-%mem | head -n 10

# Check for memory pressure
sudo dmesg | grep -i "out of memory"

# View memory statistics
vmstat 1 10

Disk I/O

# Check disk I/O statistics
iostat -x 1 10

# Monitor disk activity
sudo iotop

# Check for disk errors
sudo dmesg | grep -i error

Process Analysis

# List processes by resource usage
ps aux --sort=-%cpu | head -n 10
ps aux --sort=-%mem | head -n 10

# View process tree
pstree -p

# Check specific process
ps aux | grep docker
ps aux | grep code-server

Certificate Verification

Check Certificate Details

# View certificate information
openssl x509 -in /path/to/certificate.pem -text -noout

# Check certificate expiration
openssl x509 -in /path/to/certificate.pem -noout -dates

# Verify certificate chain
openssl verify /path/to/certificate.pem

# Test SSL/TLS connection
openssl s_client -connect localhost:443 -showcerts

Code-Server Certificate

# Check code-server certificate
openssl x509 -in /home/ec2-user/.config/code-server/certs/cert.pem -text -noout

# Verify certificate validity
openssl x509 -in /home/ec2-user/.config/code-server/certs/cert.pem -noout -dates

Configuration Verification

Environment Variables

# Check user environment
env

# Check specific variable
echo $PATH
echo $HOME

# View service environment
sudo systemctl show code-server --property=Environment

Configuration Files

# View code-server config
cat /home/ec2-user/.config/code-server/config.yaml

# View Docker Compose configuration
cat /home/ec2-user/IDS-testbed/docker-compose.yml

# Check systemd service files
sudo cat /etc/systemd/system/code-server.service
sudo cat /etc/systemd/system/dashboard-operator.service

Advanced Debugging

Network Packet Capture

# Capture traffic on specific port
sudo tcpdump -i any port 443 -w capture.pcap

# View captured packets
sudo tcpdump -r capture.pcap

# Monitor HTTP/HTTPS traffic
sudo tcpdump -i any -A port 443

Process Tracing

# Trace system calls for a process
sudo strace -p <process-id>

# Trace file operations
sudo strace -e trace=file <command>

# Monitor file access
sudo lsof -p <process-id>

Quick Diagnostic Script

Create a diagnostic script to gather common information:

#!/bin/bash
# Save as: diagnostic.sh

echo "=== System Information ==="
uname -a
uptime
free -h
df -h

echo -e "\n=== Docker Status ==="
sudo docker ps
sudo docker compose ps

echo -e "\n=== Service Status ==="
sudo systemctl status code-server --no-pager
sudo systemctl status dashboard-operator --no-pager
sudo systemctl status docker --no-pager

echo -e "\n=== Network Status ==="
sudo netstat -tulpn | grep -E ':(22|443|8000|8080|8081|8443)'

echo -e "\n=== Recent Errors ==="
sudo journalctl -p err -n 20 --no-pager

echo -e "\n=== Bootstrap Status ==="
sudo tail -n 50 /var/log/cloud-init-output.log

Run with:

chmod +x diagnostic.sh
./diagnostic.sh > diagnostic-output.txt

Getting Help

If these tools don’t resolve your issue, gather the following information before contacting support:

  1. Output from relevant diagnostic commands
  2. Recent log entries showing errors
  3. Screenshots of error messages
  4. Steps to reproduce the issue
  5. Instance details (cloud provider, size, IP address)

See Getting Support for contact information.