#!/usr/bin/env bash set -euo pipefail PROJECT_DIR="/home/liam/code/python/jobs_robots" LOG_DIR="$PROJECT_DIR/logs" LOCK_FILE="$PROJECT_DIR/.daily_job.lock" TS="$(date '+%Y-%m-%d %H:%M:%S')" mkdir -p "$LOG_DIR" # Prevent overlap if previous run is still active. exec 9>"$LOCK_FILE" if ! flock -n 9; then echo "[$TS] another job is running, exit" >> "$LOG_DIR/daily_job.log" exit 0 fi cd "$PROJECT_DIR" echo "[$TS] daily job start" >> "$LOG_DIR/daily_job.log" # Auto-advance time window to a rolling daily range. .venv/bin/python - <<'PY' import json from datetime import datetime, timezone, timedelta cfg_path = 'config.json' with open(cfg_path, 'r', encoding='utf-8') as f: cfg = json.load(f) window = cfg.setdefault('time_window', {}) if window.get('enabled', False): days = int(cfg.get('daily_window_days', 1) or 1) if days < 1: days = 1 end_dt = datetime.now(timezone.utc).date() start_dt = end_dt - timedelta(days=days - 1) window['start'] = start_dt.strftime('%Y-%m-%d') window['end'] = end_dt.strftime('%Y-%m-%d') with open(cfg_path, 'w', encoding='utf-8') as f: json.dump(cfg, f, ensure_ascii=False, indent=2) f.write('\n') print( "updated time_window: " f"start={window.get('start')} end={window.get('end')} " f"daily_window_days={cfg.get('daily_window_days', 1)}" ) PY # 1) Crawl TG incremental uv run main.py >> "$LOG_DIR/daily_job.log" 2>&1 # 2) Clean dejob_official and others into structured table uv run clean_to_structured.py >> "$LOG_DIR/daily_job.log" 2>&1 echo "[$(date '+%Y-%m-%d %H:%M:%S')] daily job done" >> "$LOG_DIR/daily_job.log"