59 lines
1.6 KiB
Bash
59 lines
1.6 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
PROJECT_DIR="/home/liam/code/python/jobs_robots"
|
||
|
|
LOG_DIR="$PROJECT_DIR/logs"
|
||
|
|
LOCK_FILE="$PROJECT_DIR/.daily_job.lock"
|
||
|
|
TS="$(date '+%Y-%m-%d %H:%M:%S')"
|
||
|
|
|
||
|
|
mkdir -p "$LOG_DIR"
|
||
|
|
|
||
|
|
# Prevent overlap if previous run is still active.
|
||
|
|
exec 9>"$LOCK_FILE"
|
||
|
|
if ! flock -n 9; then
|
||
|
|
echo "[$TS] another job is running, exit" >> "$LOG_DIR/daily_job.log"
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
cd "$PROJECT_DIR"
|
||
|
|
|
||
|
|
echo "[$TS] daily job start" >> "$LOG_DIR/daily_job.log"
|
||
|
|
|
||
|
|
# Auto-advance time window to a rolling daily range.
|
||
|
|
.venv/bin/python - <<'PY'
|
||
|
|
import json
|
||
|
|
from datetime import datetime, timezone, timedelta
|
||
|
|
|
||
|
|
cfg_path = 'config.json'
|
||
|
|
with open(cfg_path, 'r', encoding='utf-8') as f:
|
||
|
|
cfg = json.load(f)
|
||
|
|
|
||
|
|
window = cfg.setdefault('time_window', {})
|
||
|
|
if window.get('enabled', False):
|
||
|
|
days = int(cfg.get('daily_window_days', 1) or 1)
|
||
|
|
if days < 1:
|
||
|
|
days = 1
|
||
|
|
end_dt = datetime.now(timezone.utc).date()
|
||
|
|
start_dt = end_dt - timedelta(days=days - 1)
|
||
|
|
window['start'] = start_dt.strftime('%Y-%m-%d')
|
||
|
|
window['end'] = end_dt.strftime('%Y-%m-%d')
|
||
|
|
|
||
|
|
with open(cfg_path, 'w', encoding='utf-8') as f:
|
||
|
|
json.dump(cfg, f, ensure_ascii=False, indent=2)
|
||
|
|
f.write('\n')
|
||
|
|
|
||
|
|
print(
|
||
|
|
"updated time_window: "
|
||
|
|
f"start={window.get('start')} end={window.get('end')} "
|
||
|
|
f"daily_window_days={cfg.get('daily_window_days', 1)}"
|
||
|
|
)
|
||
|
|
PY
|
||
|
|
|
||
|
|
# 1) Crawl TG incremental
|
||
|
|
uv run main.py >> "$LOG_DIR/daily_job.log" 2>&1
|
||
|
|
|
||
|
|
# 2) Clean dejob_official and others into structured table
|
||
|
|
uv run clean_to_structured.py >> "$LOG_DIR/daily_job.log" 2>&1
|
||
|
|
|
||
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] daily job done" >> "$LOG_DIR/daily_job.log"
|