2026-02-26 20:00:06 +08:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
PROJECT_DIR="/home/liam/code/python/jobs_robots"
|
|
|
|
|
LOG_DIR="$PROJECT_DIR/logs"
|
|
|
|
|
LOCK_FILE="$PROJECT_DIR/.daily_job.lock"
|
|
|
|
|
TS="$(date '+%Y-%m-%d %H:%M:%S')"
|
2026-03-05 23:55:18 +08:00
|
|
|
PY_BIN="$PROJECT_DIR/.venv/bin/python"
|
2026-02-26 20:00:06 +08:00
|
|
|
|
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
|
|
|
|
|
|
# Prevent overlap if previous run is still active.
|
|
|
|
|
exec 9>"$LOCK_FILE"
|
|
|
|
|
if ! flock -n 9; then
|
|
|
|
|
echo "[$TS] another job is running, exit" >> "$LOG_DIR/daily_job.log"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
cd "$PROJECT_DIR"
|
|
|
|
|
|
|
|
|
|
echo "[$TS] daily job start" >> "$LOG_DIR/daily_job.log"
|
|
|
|
|
|
2026-03-05 23:55:18 +08:00
|
|
|
if [[ ! -x "$PY_BIN" ]]; then
|
|
|
|
|
echo "[$TS] python not found: $PY_BIN" >> "$LOG_DIR/daily_job.log"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2026-02-26 20:00:06 +08:00
|
|
|
# Auto-advance time window to a rolling daily range.
|
2026-03-05 23:55:18 +08:00
|
|
|
"$PY_BIN" - <<'PY'
|
2026-02-26 20:00:06 +08:00
|
|
|
import json
|
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
|
|
|
|
|
|
cfg_path = 'config.json'
|
|
|
|
|
with open(cfg_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
cfg = json.load(f)
|
|
|
|
|
|
|
|
|
|
window = cfg.setdefault('time_window', {})
|
|
|
|
|
if window.get('enabled', False):
|
|
|
|
|
days = int(cfg.get('daily_window_days', 1) or 1)
|
|
|
|
|
if days < 1:
|
|
|
|
|
days = 1
|
|
|
|
|
end_dt = datetime.now(timezone.utc).date()
|
|
|
|
|
start_dt = end_dt - timedelta(days=days - 1)
|
|
|
|
|
window['start'] = start_dt.strftime('%Y-%m-%d')
|
|
|
|
|
window['end'] = end_dt.strftime('%Y-%m-%d')
|
|
|
|
|
|
|
|
|
|
with open(cfg_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
json.dump(cfg, f, ensure_ascii=False, indent=2)
|
|
|
|
|
f.write('\n')
|
|
|
|
|
|
|
|
|
|
print(
|
|
|
|
|
"updated time_window: "
|
|
|
|
|
f"start={window.get('start')} end={window.get('end')} "
|
|
|
|
|
f"daily_window_days={cfg.get('daily_window_days', 1)}"
|
|
|
|
|
)
|
|
|
|
|
PY
|
|
|
|
|
|
|
|
|
|
# 1) Crawl TG incremental
|
2026-03-05 23:55:18 +08:00
|
|
|
"$PY_BIN" main.py >> "$LOG_DIR/daily_job.log" 2>&1
|
2026-02-26 20:00:06 +08:00
|
|
|
|
|
|
|
|
# 2) Clean dejob_official and others into structured table
|
2026-03-05 23:55:18 +08:00
|
|
|
"$PY_BIN" clean_to_structured.py >> "$LOG_DIR/daily_job.log" 2>&1
|
|
|
|
|
|
|
|
|
|
# 3) Sync local MySQL to cloud MySQL (only when mysql_cloud is configured)
|
|
|
|
|
if "$PY_BIN" - <<'PY'
|
|
|
|
|
import json
|
|
|
|
|
with open("config.json", "r", encoding="utf-8") as f:
|
|
|
|
|
cfg = json.load(f)
|
|
|
|
|
cloud = cfg.get("mysql_cloud") or {}
|
|
|
|
|
ok = bool(cloud.get("host") and cloud.get("user") and cloud.get("database"))
|
|
|
|
|
ok = ok and bool(cloud.get("password")) and cloud.get("password") != "CHANGE_ME"
|
|
|
|
|
raise SystemExit(0 if ok else 1)
|
|
|
|
|
PY
|
|
|
|
|
then
|
|
|
|
|
"$PY_BIN" sync_to_cloud_mysql.py >> "$LOG_DIR/daily_job.log" 2>&1
|
|
|
|
|
else
|
|
|
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] skip cloud sync: mysql_cloud not configured" >> "$LOG_DIR/daily_job.log"
|
|
|
|
|
fi
|
2026-02-26 20:00:06 +08:00
|
|
|
|
|
|
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] daily job done" >> "$LOG_DIR/daily_job.log"
|