first init
This commit is contained in:
58
run_daily_incremental.sh
Executable file
58
run_daily_incremental.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_DIR="/home/liam/code/python/jobs_robots"
|
||||
LOG_DIR="$PROJECT_DIR/logs"
|
||||
LOCK_FILE="$PROJECT_DIR/.daily_job.lock"
|
||||
TS="$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# Prevent overlap if previous run is still active.
|
||||
exec 9>"$LOCK_FILE"
|
||||
if ! flock -n 9; then
|
||||
echo "[$TS] another job is running, exit" >> "$LOG_DIR/daily_job.log"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
echo "[$TS] daily job start" >> "$LOG_DIR/daily_job.log"
|
||||
|
||||
# Auto-advance time window to a rolling daily range.
|
||||
.venv/bin/python - <<'PY'
|
||||
import json
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
cfg_path = 'config.json'
|
||||
with open(cfg_path, 'r', encoding='utf-8') as f:
|
||||
cfg = json.load(f)
|
||||
|
||||
window = cfg.setdefault('time_window', {})
|
||||
if window.get('enabled', False):
|
||||
days = int(cfg.get('daily_window_days', 1) or 1)
|
||||
if days < 1:
|
||||
days = 1
|
||||
end_dt = datetime.now(timezone.utc).date()
|
||||
start_dt = end_dt - timedelta(days=days - 1)
|
||||
window['start'] = start_dt.strftime('%Y-%m-%d')
|
||||
window['end'] = end_dt.strftime('%Y-%m-%d')
|
||||
|
||||
with open(cfg_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(cfg, f, ensure_ascii=False, indent=2)
|
||||
f.write('\n')
|
||||
|
||||
print(
|
||||
"updated time_window: "
|
||||
f"start={window.get('start')} end={window.get('end')} "
|
||||
f"daily_window_days={cfg.get('daily_window_days', 1)}"
|
||||
)
|
||||
PY
|
||||
|
||||
# 1) Crawl TG incremental
|
||||
uv run main.py >> "$LOG_DIR/daily_job.log" 2>&1
|
||||
|
||||
# 2) Clean dejob_official and others into structured table
|
||||
uv run clean_to_structured.py >> "$LOG_DIR/daily_job.log" 2>&1
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] daily job done" >> "$LOG_DIR/daily_job.log"
|
||||
Reference in New Issue
Block a user