260327-窗口迭代逻辑修复,不是每天只跑一条
This commit is contained in:
17
main.py
17
main.py
@@ -403,7 +403,8 @@ async def scrape_one_source(
|
||||
f"[{source_key}] 窗口增量模式 start={start_dt} end={end_dt} (UTC), "
|
||||
f"message_id > {last_id}"
|
||||
)
|
||||
iterator = client.iter_messages(entity, min_id=last_id, reverse=True)
|
||||
# reverse=False: 从新到旧遍历,结合时间窗口可快速终止,避免扫全量历史。
|
||||
iterator = client.iter_messages(entity, min_id=last_id, reverse=False)
|
||||
else:
|
||||
last_id = store.get_last_message_id(source_key)
|
||||
logger.info(f"[{source_key}] 增量模式,从 message_id > {last_id} 开始")
|
||||
@@ -411,6 +412,10 @@ async def scrape_one_source(
|
||||
|
||||
async for message in iterator:
|
||||
scanned += 1
|
||||
msg_id = int(message.id)
|
||||
if msg_id > max_seen_id:
|
||||
max_seen_id = msg_id
|
||||
|
||||
message_dt = message.date.astimezone(timezone.utc)
|
||||
|
||||
if window_mode:
|
||||
@@ -419,16 +424,12 @@ async def scrape_one_source(
|
||||
if start_dt and message_dt < start_dt:
|
||||
break
|
||||
|
||||
msg_id = int(message.id)
|
||||
msg_date = message_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
content = build_message_content(message)
|
||||
|
||||
if store.save_message(source_key, chat_id, msg_id, content, msg_date):
|
||||
inserted += 1
|
||||
|
||||
if msg_id > max_seen_id:
|
||||
max_seen_id = msg_id
|
||||
|
||||
if scanned % 200 == 0:
|
||||
logger.info(f"[{source_key}] 进度: 扫描 {scanned} 条, 新增 {inserted} 条")
|
||||
|
||||
@@ -441,6 +442,12 @@ async def scrape_one_source(
|
||||
if max_seen_id > old_last:
|
||||
store.set_last_message_id(source_key, max_seen_id)
|
||||
|
||||
if window_mode and scanned <= 1:
|
||||
logger.warning(
|
||||
f"[{source_key}] 本次仅扫描 {scanned} 条消息,请检查源最近是否活跃,"
|
||||
"或确认 time_window 配置与系统时钟是否正确。"
|
||||
)
|
||||
|
||||
logger.info(f"[{source_key}] 完成: 扫描 {scanned} 条, 新增 {inserted} 条")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user