feat: add shared progress API and resume/skip support for douyin batch

This commit is contained in:
hello-dd-code
2026-03-07 01:06:40 +08:00
parent 86cf933913
commit e10437cd90
3 changed files with 594 additions and 19 deletions
+311 -14
View File
@@ -1,7 +1,7 @@
// ==UserScript==
// @name Douyin Batch City Search + AutoScroll + Capture
// @namespace http://tampermonkey.net/
// @version 1.0
// @version 1.1
// @description 从 Python 服务获取地区列表,按 city + "律师" 搜索并自动下滑,拦截 /aweme/v1/web/discover/search/ 返回并转发到入库接口。
// @author You
// @match https://www.douyin.com/*
@@ -42,6 +42,13 @@
const SCROLL_BY = 2200;
const WAIT_AFTER_SEARCH_MS = 1000;
const DELAY_BETWEEN_CITIES_MS = 1500;
// 断点续跑配置
const PROGRESS_STORAGE_KEY = 'dm_batch_progress_v1';
const DEVICE_ID_STORAGE_KEY = 'dm_batch_device_id_v1';
const PROGRESS_SYNC_ENABLED = true;
const PROGRESS_KEY = 'douyin_batch_default';
const PROGRESS_API = `${API_BASE}/api/layer/progress?server=1`;
// 可选:如果希望只发送包含手机号的条目,可在此启用并调整正则
const ONLY_SEND_IF_HAS_PHONE = false;
@@ -50,14 +57,20 @@
/********************* 运行时状态 *********************/
let areaList = [];
let stopFlag = false; // 由 UI 控制,true 表示停止整个任务
let skipCurrentCityFlag = false; // 由 UI 控制,true 表示跳过当前城市
let currentCityIndex = -1;
let currentAreaSignature = '';
let isLoopRunning = false;
let inputEl = null;
let btnEl = null;
const DEVICE_ID = getOrCreateDeviceId();
// 节流/去重发送
let lastSentHash = null;
let lastSentAt = 0;
const SEND_MIN_INTERVAL_MS = 800;
let progressSyncInFlight = false;
let progressSyncPendingPayload = null;
/********************* 工具函数 *********************/
function log(...args) { console.log('[DouyinBatch] ', ...args); }
@@ -71,6 +84,106 @@
}
return h.toString(16);
}
function sleep(ms) {
return new Promise(r => setTimeout(r, ms));
}
function getOrCreateDeviceId() {
try {
const old = localStorage.getItem(DEVICE_ID_STORAGE_KEY);
if (old) return old;
const generated = (window.crypto && typeof window.crypto.randomUUID === 'function')
? window.crypto.randomUUID()
: `dm-${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
localStorage.setItem(DEVICE_ID_STORAGE_KEY, generated);
return generated;
} catch (_) {
return `dm-${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
}
}
function getAreaRowName(row) {
if (!row || typeof row !== 'object') return '';
return String(row.city || row.province || row.name || '').trim();
}
function buildAreaSignature(list) {
try {
if (!Array.isArray(list) || list.length === 0) return 'empty';
const names = list.map(getAreaRowName).filter(Boolean);
return hashString(`${list.length}|${names.join('|')}`);
} catch (e) {
return 'unknown';
}
}
function readProgress() {
try {
const raw = localStorage.getItem(PROGRESS_STORAGE_KEY);
if (!raw) return null;
const parsed = JSON.parse(raw);
if (!parsed || typeof parsed !== 'object') return null;
return parsed;
} catch (_) {
return null;
}
}
function buildProgressPayload(nextCityIndex, reason = '') {
const safeIndex = Number.isFinite(nextCityIndex) ? Math.max(0, Math.floor(nextCityIndex)) : 0;
const currentArea = areaList[safeIndex] || areaList[Math.max(0, currentCityIndex)] || {};
return {
progress_key: PROGRESS_KEY,
device_id: DEVICE_ID,
next_city_index: safeIndex,
area_signature: currentAreaSignature || '',
area_total: Array.isArray(areaList) ? areaList.length : 0,
current_city: getAreaRowName(currentArea),
reason,
status: stopFlag ? 'paused' : 'running',
extra: {
path: location.pathname || '',
href: location.href || '',
},
};
}
function persistProgress(nextCityIndex, reason = '') {
try {
const payload = buildProgressPayload(nextCityIndex, reason);
localStorage.setItem(PROGRESS_STORAGE_KEY, JSON.stringify({
nextCityIndex: payload.next_city_index,
areaSignature: payload.area_signature,
reason: payload.reason,
updatedAt: Date.now(),
progressKey: payload.progress_key,
deviceId: payload.device_id,
}));
enqueueRemoteProgressSync(payload);
} catch (e) {
err('保存进度失败', e);
}
}
function restoreProgress(areaSignature, listLength) {
const progress = readProgress();
if (!progress) return 0;
if (!progress.areaSignature || progress.areaSignature !== areaSignature) return 0;
const idx = Number.isFinite(progress.nextCityIndex) ? Math.floor(progress.nextCityIndex) : 0;
if (idx < 0 || idx >= listLength) return 0;
return idx;
}
function clearProgress() {
try { localStorage.removeItem(PROGRESS_STORAGE_KEY); } catch (_) {}
enqueueRemoteProgressSync({
action: 'clear',
progress_key: PROGRESS_KEY,
device_id: DEVICE_ID,
});
}
function gmGetJson(url) {
return new Promise((resolve, reject) => {
@@ -89,6 +202,76 @@
});
});
}
function gmPostJson(url, data) {
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'POST',
url,
headers: { 'Content-Type': 'application/json' },
data: JSON.stringify(data || {}),
onload(res) {
try {
const json = JSON.parse(res.responseText || '{}');
resolve(json);
} catch (e) {
reject(e);
}
},
onerror(err) { reject(err); }
});
});
}
function enqueueRemoteProgressSync(payload) {
if (!PROGRESS_SYNC_ENABLED) return;
if (!payload || typeof payload !== 'object') return;
progressSyncPendingPayload = payload;
if (progressSyncInFlight) return;
flushRemoteProgressSync();
}
async function flushRemoteProgressSync() {
if (!PROGRESS_SYNC_ENABLED) return;
if (progressSyncInFlight) return;
progressSyncInFlight = true;
try {
while (progressSyncPendingPayload) {
const payload = progressSyncPendingPayload;
progressSyncPendingPayload = null;
try {
await gmPostJson(PROGRESS_API, payload);
} catch (e) {
err('同步远端进度失败', e);
break;
}
}
} finally {
progressSyncInFlight = false;
}
}
async function restoreRemoteProgress(areaSignature, listLength) {
if (!PROGRESS_SYNC_ENABLED) return 0;
try {
const url = `${PROGRESS_API}&progress_key=${encodeURIComponent(PROGRESS_KEY)}`;
const response = await gmGetJson(url);
const data = response && response.data ? response.data : null;
if (!data || typeof data !== 'object') return 0;
const remoteSignature = String(data.area_signature || '');
if (!remoteSignature || remoteSignature !== areaSignature) return 0;
const idxRaw = data.next_city_index;
const idx = Number.isFinite(idxRaw) ? Math.floor(idxRaw) : Math.floor(Number(idxRaw || 0));
if (!Number.isFinite(idx) || idx < 0 || idx >= listLength) return 0;
return idx;
} catch (e) {
err('读取远端进度失败', e);
return 0;
}
}
function setNativeValue(el, value) {
if (!el) return;
@@ -263,6 +446,11 @@
let scrolls = 0;
while (!stopFlag) {
if (skipCurrentCityFlag) {
statusNode.textContent = '收到跳过指令,结束当前地区滚动。';
break;
}
scrolls++;
if (scrolls > maxScrolls) {
statusNode.textContent = `达到单次搜索最大滚动 ${maxScrolls},停止本次自动下滑。`;
@@ -276,7 +464,12 @@
window.scrollTo(0, (document.body.scrollHeight || document.documentElement.scrollHeight));
}
await new Promise(r => setTimeout(r, SCROLL_INTERVAL_MS));
await sleep(SCROLL_INTERVAL_MS);
if (skipCurrentCityFlag) {
statusNode.textContent = '收到跳过指令,结束当前地区滚动。';
break;
}
const curHeight = document.body.scrollHeight || document.documentElement.scrollHeight || 0;
if (curHeight === lastHeight) {
@@ -391,7 +584,9 @@
const css = `
#dm-batch-btn { position: fixed; right: 12px; bottom: 12px; z-index:999999; background: rgba(0,0,0,0.65); color:#fff;
padding:8px 10px; border-radius:8px; font-size:13px; cursor:pointer; user-select:none;}
#dm-batch-status { position: fixed; right:12px; bottom:56px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff;
#dm-batch-skip { position: fixed; right:12px; bottom:50px; z-index:999999; background: rgba(30,30,30,0.72); color:#fff;
padding:7px 10px; border-radius:8px; font-size:12px; cursor:pointer; user-select:none;}
#dm-batch-status { position: fixed; right:12px; bottom:88px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff;
padding:6px 8px; border-radius:6px; font-size:12px; max-width:320px; word-break:break-word;}
`;
const s = document.createElement('style'); s.textContent = css; document.head && document.head.appendChild(s);
@@ -401,6 +596,11 @@
btn.textContent = 'BatchSearch:停止';
btn.dataset.running = '1';
document.body.appendChild(btn);
const skipBtn = document.createElement('div');
skipBtn.id = 'dm-batch-skip';
skipBtn.textContent = 'BatchSearch:跳过当前';
document.body.appendChild(skipBtn);
const status = document.createElement('div');
status.id = 'dm-batch-status';
@@ -411,21 +611,55 @@
const running = btn.dataset.running === '1';
btn.dataset.running = running ? '0' : '1';
btn.textContent = running ? 'BatchSearch:已停止' : 'BatchSearch:停止';
status.textContent = running ? '已手动停止' : '已启动';
status.textContent = running ? '已手动停止(已保存断点)' : '已启动';
stopFlag = running; // if was running and clicked -> set stopFlag true; if restarting, set false
if (running) {
skipCurrentCityFlag = false;
persistProgress(Math.max(currentCityIndex, 0), 'manual_pause');
}
if (!stopFlag) {
// restart loop if needed
runBatchSearchLoop(status).catch(e => err(e));
}
});
skipBtn.addEventListener('click', () => {
if (currentCityIndex < 0) {
status.textContent = '当前还未开始处理城市,稍后再跳过。';
return;
}
skipCurrentCityFlag = true;
const areaName = getAreaRowName(areaList[currentCityIndex] || {});
status.textContent = `收到跳过指令:${areaName || `索引${currentCityIndex}`}`;
});
skipBtn.addEventListener('contextmenu', (event) => {
event.preventDefault();
clearProgress();
currentCityIndex = 0;
status.textContent = '已清除断点。下次将从第 1 个地区开始。';
});
return { btn, status };
return { btn, skipBtn, status };
}
/********************* 主流程:获取城市并循环搜索 *********************/
async function runBatchSearchLoop(statusNode) {
if (isLoopRunning) {
statusNode.textContent = '批量任务已在运行中,请勿重复启动。';
return;
}
isLoopRunning = true;
try {
stopFlag = (document.getElementById('dm-batch-btn') && document.getElementById('dm-batch-btn').dataset.running === '0');
skipCurrentCityFlag = false;
if (stopFlag) {
statusNode.textContent = '当前是暂停状态,点击“BatchSearch:停止”可继续。';
return;
}
// 获取 area list(仅在内存为空时获取)
if (!areaList || !Array.isArray(areaList) || areaList.length === 0) {
statusNode.textContent = '正在获取城市列表...';
@@ -450,6 +684,20 @@
return;
}
}
currentAreaSignature = buildAreaSignature(areaList);
const restoredIndexLocal = restoreProgress(currentAreaSignature, areaList.length);
const restoredIndexRemote = await restoreRemoteProgress(currentAreaSignature, areaList.length);
const restoredIndex = Math.max(restoredIndexLocal, restoredIndexRemote);
const startIndex = (currentCityIndex >= 0 && currentCityIndex < areaList.length)
? currentCityIndex
: restoredIndex;
currentCityIndex = startIndex;
if (startIndex > 0) {
statusNode.textContent = `检测到断点(本地:${restoredIndexLocal + 1} 远端:${restoredIndexRemote + 1}),将从第 ${startIndex + 1}/${areaList.length} 个地区继续。`;
await sleep(500);
}
// 等待搜索输入与按钮可用
try {
@@ -459,13 +707,28 @@
statusNode.textContent = '未找到搜索输入或按钮,脚本仍会监听接口,但无法自动搜索。';
return;
}
let completedAll = true;
// 主循环:对每个 city 执行搜索 -> 下滑 -> 发送结果 -> 下一 city
for (let i = 0; i < areaList.length; i++) {
if (stopFlag) { statusNode.textContent = '已停止'; break; }
for (let i = startIndex; i < areaList.length; i++) {
if (stopFlag) {
completedAll = false;
persistProgress(i, 'manual_stop');
statusNode.textContent = '已停止(断点已保存)。';
break;
}
currentCityIndex = i;
skipCurrentCityFlag = false;
persistProgress(i, 'start_city');
const city = (areaList[i].city || areaList[i].province || '').trim();
if (!city) continue;
if (!city) {
persistProgress(i + 1, 'empty_city');
continue;
}
const keyword = `${city}律师`;
statusNode.textContent = `正在搜索:${keyword} ${i+1}/${areaList.length}`;
log(`开始城市[${i+1}/${areaList.length}] 搜索:`, keyword);
@@ -476,6 +739,8 @@
} catch (e) {
err('刷新搜索控件失败', e);
statusNode.textContent = '刷新搜索控件失败,终止批量搜索。';
completedAll = false;
persistProgress(i, 'search_control_error');
break;
}
@@ -488,6 +753,8 @@
await ensureSearchControls(statusNode);
if (!simulateSearchTrigger()) {
statusNode.textContent = '搜索触发失败,终止批量搜索。';
completedAll = false;
persistProgress(i, 'search_trigger_error');
break;
}
}
@@ -497,25 +764,55 @@
// 自动下滑直到稳定或达到上限
await autoScrollUntilStable(statusNode, MAX_SCROLLS_PER_CITY);
if (skipCurrentCityFlag) {
skipCurrentCityFlag = false;
persistProgress(i + 1, 'skip_city');
statusNode.textContent = `已跳过 ${keyword},继续下一个地区...`;
await sleep(Math.min(DELAY_BETWEEN_CITIES_MS, 800));
continue;
}
if (stopFlag) { statusNode.textContent = '已停止'; break; }
if (stopFlag) {
completedAll = false;
persistProgress(i, 'manual_stop_after_scroll');
statusNode.textContent = '已停止(断点已保存)。';
break;
}
persistProgress(i + 1, 'city_done');
// 等待短暂间隔再进行下一个城市
statusNode.textContent = `完成 ${keyword} 的加载,等待 ${DELAY_BETWEEN_CITIES_MS} ms 后继续...`;
await new Promise(r => setTimeout(r, DELAY_BETWEEN_CITIES_MS));
await sleep(DELAY_BETWEEN_CITIES_MS);
}
if (completedAll && !stopFlag) {
clearProgress();
currentCityIndex = -1;
statusNode.textContent = '批量搜索完成,已清除断点进度。';
log('批量搜索循环结束: completed');
} else {
log('批量搜索循环结束: paused/broken');
}
statusNode.textContent = '批量搜索完成或已停止。';
log('批量搜索循环结束');
} catch (e) {
err('runBatchSearchLoop error', e);
persistProgress(Math.max(currentCityIndex, 0), 'loop_exception');
} finally {
isLoopRunning = false;
}
}
/********************* 启动脚本 *********************/
(function init() {
window.addEventListener('beforeunload', () => {
if (currentCityIndex >= 0) {
persistProgress(Math.max(currentCityIndex, 0), 'page_unload');
}
});
const ui = createUI();
ui.status.textContent = '就绪 - 点击右下按钮可停止/重启批量搜索';
ui.status.textContent = '就绪 - 可暂停/跳过,自动保存断点(右键跳过按钮可清除断点)';
console.log(location.pathname)
// 如果当前为目标页面(/jingxuan/search/),则自动启动;否则仍可在任何页面打开并手动启动。
const isAutoPage = location.pathname && location.pathname.indexOf('/search/') !== -1;