#!/usr/bin/env bash set -euo pipefail PROJECT_DIR="/home/liam/code/python/jobs_robots" LOG_DIR="$PROJECT_DIR/logs" LOCK_FILE="$PROJECT_DIR/.daily_job.lock" TS="$(date '+%Y-%m-%d %H:%M:%S')" PY_BIN="$PROJECT_DIR/.venv/bin/python" mkdir -p "$LOG_DIR" # Prevent overlap if previous run is still active. exec 9>"$LOCK_FILE" if ! flock -n 9; then echo "[$TS] another job is running, exit" >> "$LOG_DIR/daily_job.log" exit 0 fi cd "$PROJECT_DIR" echo "[$TS] daily job start" >> "$LOG_DIR/daily_job.log" if [[ ! -x "$PY_BIN" ]]; then echo "[$TS] python not found: $PY_BIN" >> "$LOG_DIR/daily_job.log" exit 1 fi # Auto-advance time window to a rolling daily range. "$PY_BIN" - <<'PY' import json from datetime import datetime, timezone, timedelta cfg_path = 'config.json' with open(cfg_path, 'r', encoding='utf-8') as f: cfg = json.load(f) window = cfg.setdefault('time_window', {}) if window.get('enabled', False): days = int(cfg.get('daily_window_days', 1) or 1) if days < 1: days = 1 end_dt = datetime.now(timezone.utc).date() start_dt = end_dt - timedelta(days=days - 1) window['start'] = start_dt.strftime('%Y-%m-%d') window['end'] = end_dt.strftime('%Y-%m-%d') with open(cfg_path, 'w', encoding='utf-8') as f: json.dump(cfg, f, ensure_ascii=False, indent=2) f.write('\n') print( "updated time_window: " f"start={window.get('start')} end={window.get('end')} " f"daily_window_days={cfg.get('daily_window_days', 1)}" ) PY # 1) Crawl TG incremental "$PY_BIN" main.py >> "$LOG_DIR/daily_job.log" 2>&1 # 2) Clean dejob_official and others into structured table "$PY_BIN" clean_to_structured.py >> "$LOG_DIR/daily_job.log" 2>&1 # 3) Sync local MySQL to cloud MySQL (only when mysql_cloud is configured) if "$PY_BIN" - <<'PY' import json with open("config.json", "r", encoding="utf-8") as f: cfg = json.load(f) cloud = cfg.get("mysql_cloud") or {} ok = bool(cloud.get("host") and cloud.get("user") and cloud.get("database")) ok = ok and bool(cloud.get("password")) and cloud.get("password") != "CHANGE_ME" raise SystemExit(0 if ok else 1) PY then "$PY_BIN" sync_to_cloud_mysql.py >> "$LOG_DIR/daily_job.log" 2>&1 else echo "[$(date '+%Y-%m-%d %H:%M:%S')] skip cloud sync: mysql_cloud not configured" >> "$LOG_DIR/daily_job.log" fi echo "[$(date '+%Y-%m-%d %H:%M:%S')] daily job done" >> "$LOG_DIR/daily_job.log"