summaryrefslogtreecommitdiff
path: root/watchman.sh
blob: a07e3318d28450d82636ede9f27847efae678326 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env bash
#set -e
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
# === Enable Full Debug Logging ===
exec >> /home/doc/healthchecks/watchman.log 2>&1
set -x  # Print each command as it’s run
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
echo "[$DATE] Watchman script executed" >> /var/log/watchman_cron.log

# === Config ===
PRIMARY_IP="38.102.127.168"           # Main TeamTalk server
BACKUP_IP="172.238.63.162"            # Backup TeamTalk server
CF_ZONE_ID="c5099d42caa2d9763227267c597cb758"
CF_RECORD_ID="7001484a25f0fe5c323845b6695f7544"
CF_API_TOKEN="lCz1kH6nBZPJL0EWrNI-xEDwfR0oOLpg05fq6M81"
THRESHOLD_LATENCY=150
THRESHOLD_LOSS=5
BOT_TOKEN="123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11"
CHAT_ID="987654321"
DNS_NAME="tt.themediahub.org"

LOG_FILE="/home/doc/healthchecks/watchman.log"
DATE="$(date '+%Y-%m-%d %H:%M:%S')"

# === Current DNS IP ===
CURRENT_IP=$(/usr/bin/dig +short "$DNS_NAME" | grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' | head -1)
echo "[$DATE] Current IP: $CURRENT_IP"
# === Check Primary Server Health ===
echo "[$DATE] πŸ”Ž Checking ping to $PRIMARY_IP..."
PING_OUTPUT=$(/bin/ping -c 4 "$PRIMARY_IP" || echo "Ping failed")
LATENCY=$(echo "$PING_OUTPUT" | tail -1 | /usr/bin/awk -F '/' '{print $5}')
echo "[$DATE] Ping output: $PING_OUTPUT"
LOSS=$(echo "$PING_OUTPUT" | /bin/grep -oP '\d+(?=% packet loss)')
echo "[$DATE] Parsed latency: $LATENCY, loss: $LOSS"
echo "[$DATE] Ping output: $PING_OUTPUT"
echo "[$DATE] Parsed latency: $LATENCY, loss: $LOSS"
echo "[$DATE] Current DNS IP: $CURRENT_IP"

if [[ -z "$LATENCY" || "$LOSS" -ge "$THRESHOLD_LOSS" || ( -n "$LATENCY" && "$(echo "$LATENCY > $THRESHOLD_LATENCY" | bc)" -eq 1 ) ]]; then
  if [[ "$CURRENT_IP" != "$BACKUP_IP" ]]; then
    echo "[$DATE] 🚨 Primary down! Switching DNS to backup IP ($BACKUP_IP)..."
    MESSAGE="🚨 ALERT: Primary TeamTalk ($PRIMARY_IP) down. Loss: ${LOSS}%, Latency: ${LATENCY}ms. Switching to backup: $BACKUP_IP"
    curl -v -s -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \
      -d "chat_id=${CHAT_ID}" -d "text=${MESSAGE}"

    echo "[$DATE] πŸ”„ Sending DNS switch request to Cloudflare..."
    API_RESPONSE=$(curl -v -s -X PUT "https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/dns_records/${CF_RECORD_ID}" \
      -H "Authorization: Bearer ${CF_API_TOKEN}" \
      -H "Content-Type: application/json" \
      --data "{\"type\":\"A\",\"name\":\"${DNS_NAME}\",\"content\":\"${BACKUP_IP}\",\"ttl\":60,\"proxied\":false}")
    echo "[$DATE] Cloudflare API response: $API_RESPONSE"
    echo "[$DATE] βœ… DNS switched to backup."
  else
    echo "[$DATE] πŸ”„ Primary down, but already on backup. No DNS change needed."
  fi
else
  if [[ "$CURRENT_IP" != "$PRIMARY_IP" ]]; then
    echo "[$DATE] βœ… Primary healthy! Switching DNS back to primary IP ($PRIMARY_IP)..."
    MESSAGE="βœ… Primary TeamTalk ($PRIMARY_IP) back online. Loss: ${LOSS}%, Latency: ${LATENCY}ms. Switching DNS back to primary."
    curl -v -s -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \
      -d "chat_id=${CHAT_ID}" -d "text=${MESSAGE}"

    echo "[$DATE] πŸ”„ Sending DNS switch back to Cloudflare..."
    API_RESPONSE=$(curl -v -s -X PUT "https://api.cloudflare.com/client/v4/zones/${CF_ZONE_ID}/dns_records/${CF_RECORD_ID}" \
      -H "Authorization: Bearer ${CF_API_TOKEN}" \
      -H "Content-Type: application/json" \
      --data "{\"type\":\"A\",\"name\":\"${DNS_NAME}\",\"content\":\"${PRIMARY_IP}\",\"ttl\":60,\"proxied\":false}")
    echo "[$DATE] Cloudflare API response: $API_RESPONSE"
    echo "[$DATE] βœ… DNS switched back to primary."
  else
    echo "[$DATE] βœ… Primary healthy, already using primary IP. No DNS change needed."
  fi
fi