I recently had an issue with a new linux syslog server that was using Arc and had the AMA service enabled by a data collection rule in Sentinel.
I could see the Sentinel DCR (data collection rule) had been pushed out but the AMA agent wasn’t forwarding logs back up to Sentinel.
I suspected traffic was getting blocked but I wasn’t sure how to validate it.
This script will extract the Sentinel Workspace ID and perform a network connection test that simulates the connection from AMA to the data collection point or ODS(operational data store).
If the script fails, it means you need to talk to your firewall admin to open a connection to *.ods.opinsights.azure.com.
If you’re good at reading curl, you don’t need the script, just curl to
https://<workspaceid>.ods.opinsights.azure.com
The script also checks the the AMA service is running and that you’re not out of disk space – 2 other common issues.
Have fun!
#!/bin/bash
# AMA Agent Validation Script
# Checks common issues with Azure Monitor Agent on Linux
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}=== Azure Monitor Agent Validation Script ===${NC}"
echo "Started at: $(date)"
echo
# Function to check endpoint connectivity
check_endpoint() {
local url=$1
local description=$2
echo -n "Testing $description... "
if curl -s --connect-timeout 10 --max-time 30 "$url" >/dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}FAILED${NC}"
return 1
fi
}
# Function to check SSL handshake specifically
check_ssl_handshake() {
local host=$1
local description=$2
echo -n "Testing SSL handshake for $description... "
if timeout 10 openssl s_client -connect "$host:443" -servername "$host" </dev/null >/dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}FAILED${NC}"
return 1
fi
}
# 1. Check AMA service status
echo -e "${BLUE}1. AMA Service Status${NC}"
if systemctl is-active --quiet azuremonitoragent; then
echo -e "Service status: ${GREEN}RUNNING${NC}"
echo "Service uptime: $(systemctl show azuremonitoragent --property=ActiveEnterTimestamp --value)"
else
echo -e "Service status: ${RED}NOT RUNNING${NC}"
echo "Try: systemctl status azuremonitoragent"
fi
echo
# 2. Check disk space
echo -e "${BLUE}2. Disk Space Check${NC}"
AMA_PATH="/var/opt/microsoft/azuremonitoragent"
if [ -d "$AMA_PATH" ]; then
DISK_USAGE=$(df -h "$AMA_PATH" | awk 'NR==2 {print $5}' | sed 's/%//')
if [ "$DISK_USAGE" -gt 90 ]; then
echo -e "Disk usage: ${RED}${DISK_USAGE}% (CRITICAL)${NC}"
echo "Free space needed in $(df -h "$AMA_PATH" | awk 'NR==2 {print $1}')"
du -sh "$AMA_PATH/events"/* 2>/dev/null | sort -hr | head -5
elif [ "$DISK_USAGE" -gt 80 ]; then
echo -e "Disk usage: ${YELLOW}${DISK_USAGE}% (WARNING)${NC}"
else
echo -e "Disk usage: ${GREEN}${DISK_USAGE}% (OK)${NC}"
fi
else
echo -e "${RED}AMA directory not found${NC}"
fi
echo
# 3. Extract endpoints from config
echo -e "${BLUE}3. Extracting Configured Endpoints${NC}"
CONFIG_DIR="/etc/opt/microsoft/azuremonitoragent/config-cache"
WORKSPACE_ID=""
ENDPOINTS=()
if [ -d "$CONFIG_DIR" ]; then
# Extract workspace ID and endpoints
WORKSPACE_ID=$(grep -r "ods.opinsights.azure.com" "$CONFIG_DIR" 2>/dev/null | head -1 | grep -o '[a-f0-9-]\{36\}\.ods\.opinsights\.azure\.com' | cut -d'.' -f1 || echo "")
if [ -n "$WORKSPACE_ID" ]; then
echo "Workspace ID: $WORKSPACE_ID"
ENDPOINTS+=("https://${WORKSPACE_ID}.ods.opinsights.azure.com")
fi
# Add standard endpoints
ENDPOINTS+=(
"https://global.handler.control.monitor.azure.com"
"https://centralus.monitoring.azure.com"
"https://management.azure.com"
"https://login.microsoftonline.com"
"https://ods.opinsights.azure.com"
)
else
echo -e "${RED}Config directory not found${NC}"
# Use default endpoints
ENDPOINTS=(
"https://global.handler.control.monitor.azure.com"
"https://centralus.monitoring.azure.com"
"https://management.azure.com"
"https://login.microsoftonline.com"
"https://ods.opinsights.azure.com"
)
fi
echo
# 4. Test endpoint connectivity
echo -e "${BLUE}4. Network Connectivity Tests${NC}"
failed_endpoints=0
for endpoint in "${ENDPOINTS[@]}"; do
if ! check_endpoint "$endpoint" "$endpoint"; then
((failed_endpoints++))
fi
done
echo
# 5. Test SSL handshakes for critical endpoints
echo -e "${BLUE}5. SSL Handshake Tests${NC}"
ssl_failed=0
if [ -n "$WORKSPACE_ID" ]; then
if ! check_ssl_handshake "${WORKSPACE_ID}.ods.opinsights.azure.com" "Workspace ODS"; then
((ssl_failed++))
fi
fi
if ! check_ssl_handshake "global.handler.control.monitor.azure.com" "Control Plane"; then
((ssl_failed++))
fi
echo
# 6. Check for recent AMA errors
echo -e "${BLUE}6. Recent AMA Errors (last 1 hour)${NC}"
if command -v journalctl >/dev/null; then
error_count=$(journalctl -u azuremonitoragent --since "1 hour ago" | grep -i "error\|failed\|ssl handshake" -c || echo "0")
if [ "$error_count" -gt 0 ]; then
echo -e "Recent errors: ${RED}$error_count${NC}"
echo "Recent SSL handshake failures:"
journalctl -u azuremonitoragent --since "1 hour ago" | grep -i "ssl handshake" | tail -3
echo "Recent disk space errors:"
journalctl -u azuremonitoragent --since "1 hour ago" | grep -i "no space left" | tail -3
else
echo -e "Recent errors: ${GREEN}0${NC}"
fi
else
echo "journalctl not available"
fi
echo
# 7. Check listening ports
echo -e "${BLUE}7. AMA Listening Ports${NC}"
if ss -tlnp | grep -q ":28330"; then
echo -e "Port 28330 (syslog): ${GREEN}LISTENING${NC}"
else
echo -e "Port 28330 (syslog): ${RED}NOT LISTENING${NC}"
fi
echo
# 8. System time check (critical for SSL)
echo -e "${BLUE}8. System Time Check${NC}"
current_time=$(date +%s)
ntp_time=$(curl -s "http://worldtimeapi.org/api/timezone/UTC" | grep -o '"unixtime":[0-9]*' | cut -d':' -f2 2>/dev/null || echo "$current_time")
time_diff=$((current_time - ntp_time))
time_diff=${time_diff#-} # absolute value
if [ "$time_diff" -gt 300 ]; then
echo -e "Time sync: ${RED}OUT OF SYNC (${time_diff}s difference)${NC}"
echo "Current: $(date)"
echo "Consider: ntpdate or chrony sync"
else
echo -e "Time sync: ${GREEN}OK${NC}"
fi
echo
# Summary
echo -e "${BLUE}=== SUMMARY ===${NC}"
if [ "$failed_endpoints" -eq 0 ] && [ "$ssl_failed" -eq 0 ]; then
echo -e "Overall status: ${GREEN}HEALTHY${NC}"
echo "All endpoints accessible and SSL working correctly"
elif [ "$ssl_failed" -gt 0 ]; then
echo -e "Overall status: ${RED}SSL ISSUES${NC}"
echo "SSL handshake failures detected - check firewall/proxy settings"
echo "Contact network team to whitelist Azure Monitor endpoints"
elif [ "$failed_endpoints" -gt 0 ]; then
echo -e "Overall status: ${YELLOW}CONNECTIVITY ISSUES${NC}"
echo "Some endpoints unreachable - check network connectivity"
else
echo -e "Overall status: ${YELLOW}CHECK REQUIRED${NC}"
fi
echo
echo "Log locations:"
echo " - AMA logs: journalctl -u azuremonitoragent"
echo " - Config: /etc/opt/microsoft/azuremonitoragent/config-cache/"
echo " - Events: /var/opt/microsoft/azuremonitoragent/events/"
echo
echo "Common fixes:"
echo " - Disk space: Clean /var/opt/microsoft/azuremonitoragent/events/"
echo " - SSL issues: Whitelist *.ods.opinsights.azure.com in firewall"
echo " - Service: systemctl restart azuremonitoragent"