feat: add shared progress API and resume/skip support for douyin batch
This commit is contained in:
+311
-14
@@ -1,7 +1,7 @@
|
||||
// ==UserScript==
|
||||
// @name Douyin Batch City Search + AutoScroll + Capture
|
||||
// @namespace http://tampermonkey.net/
|
||||
// @version 1.0
|
||||
// @version 1.1
|
||||
// @description 从 Python 服务获取地区列表,按 city + "律师" 搜索并自动下滑,拦截 /aweme/v1/web/discover/search/ 返回并转发到入库接口。
|
||||
// @author You
|
||||
// @match https://www.douyin.com/*
|
||||
@@ -42,6 +42,13 @@
|
||||
const SCROLL_BY = 2200;
|
||||
const WAIT_AFTER_SEARCH_MS = 1000;
|
||||
const DELAY_BETWEEN_CITIES_MS = 1500;
|
||||
|
||||
// 断点续跑配置
|
||||
const PROGRESS_STORAGE_KEY = 'dm_batch_progress_v1';
|
||||
const DEVICE_ID_STORAGE_KEY = 'dm_batch_device_id_v1';
|
||||
const PROGRESS_SYNC_ENABLED = true;
|
||||
const PROGRESS_KEY = 'douyin_batch_default';
|
||||
const PROGRESS_API = `${API_BASE}/api/layer/progress?server=1`;
|
||||
|
||||
// 可选:如果希望只发送包含手机号的条目,可在此启用并调整正则
|
||||
const ONLY_SEND_IF_HAS_PHONE = false;
|
||||
@@ -50,14 +57,20 @@
|
||||
/********************* 运行时状态 *********************/
|
||||
let areaList = [];
|
||||
let stopFlag = false; // 由 UI 控制,true 表示停止整个任务
|
||||
let skipCurrentCityFlag = false; // 由 UI 控制,true 表示跳过当前城市
|
||||
let currentCityIndex = -1;
|
||||
let currentAreaSignature = '';
|
||||
let isLoopRunning = false;
|
||||
let inputEl = null;
|
||||
let btnEl = null;
|
||||
const DEVICE_ID = getOrCreateDeviceId();
|
||||
|
||||
// 节流/去重发送
|
||||
let lastSentHash = null;
|
||||
let lastSentAt = 0;
|
||||
const SEND_MIN_INTERVAL_MS = 800;
|
||||
let progressSyncInFlight = false;
|
||||
let progressSyncPendingPayload = null;
|
||||
|
||||
/********************* 工具函数 *********************/
|
||||
function log(...args) { console.log('[DouyinBatch] ', ...args); }
|
||||
@@ -71,6 +84,106 @@
|
||||
}
|
||||
return h.toString(16);
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(r => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
function getOrCreateDeviceId() {
|
||||
try {
|
||||
const old = localStorage.getItem(DEVICE_ID_STORAGE_KEY);
|
||||
if (old) return old;
|
||||
const generated = (window.crypto && typeof window.crypto.randomUUID === 'function')
|
||||
? window.crypto.randomUUID()
|
||||
: `dm-${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
|
||||
localStorage.setItem(DEVICE_ID_STORAGE_KEY, generated);
|
||||
return generated;
|
||||
} catch (_) {
|
||||
return `dm-${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
|
||||
}
|
||||
}
|
||||
|
||||
function getAreaRowName(row) {
|
||||
if (!row || typeof row !== 'object') return '';
|
||||
return String(row.city || row.province || row.name || '').trim();
|
||||
}
|
||||
|
||||
function buildAreaSignature(list) {
|
||||
try {
|
||||
if (!Array.isArray(list) || list.length === 0) return 'empty';
|
||||
const names = list.map(getAreaRowName).filter(Boolean);
|
||||
return hashString(`${list.length}|${names.join('|')}`);
|
||||
} catch (e) {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
|
||||
function readProgress() {
|
||||
try {
|
||||
const raw = localStorage.getItem(PROGRESS_STORAGE_KEY);
|
||||
if (!raw) return null;
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!parsed || typeof parsed !== 'object') return null;
|
||||
return parsed;
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function buildProgressPayload(nextCityIndex, reason = '') {
|
||||
const safeIndex = Number.isFinite(nextCityIndex) ? Math.max(0, Math.floor(nextCityIndex)) : 0;
|
||||
const currentArea = areaList[safeIndex] || areaList[Math.max(0, currentCityIndex)] || {};
|
||||
return {
|
||||
progress_key: PROGRESS_KEY,
|
||||
device_id: DEVICE_ID,
|
||||
next_city_index: safeIndex,
|
||||
area_signature: currentAreaSignature || '',
|
||||
area_total: Array.isArray(areaList) ? areaList.length : 0,
|
||||
current_city: getAreaRowName(currentArea),
|
||||
reason,
|
||||
status: stopFlag ? 'paused' : 'running',
|
||||
extra: {
|
||||
path: location.pathname || '',
|
||||
href: location.href || '',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function persistProgress(nextCityIndex, reason = '') {
|
||||
try {
|
||||
const payload = buildProgressPayload(nextCityIndex, reason);
|
||||
localStorage.setItem(PROGRESS_STORAGE_KEY, JSON.stringify({
|
||||
nextCityIndex: payload.next_city_index,
|
||||
areaSignature: payload.area_signature,
|
||||
reason: payload.reason,
|
||||
updatedAt: Date.now(),
|
||||
progressKey: payload.progress_key,
|
||||
deviceId: payload.device_id,
|
||||
}));
|
||||
|
||||
enqueueRemoteProgressSync(payload);
|
||||
} catch (e) {
|
||||
err('保存进度失败', e);
|
||||
}
|
||||
}
|
||||
|
||||
function restoreProgress(areaSignature, listLength) {
|
||||
const progress = readProgress();
|
||||
if (!progress) return 0;
|
||||
if (!progress.areaSignature || progress.areaSignature !== areaSignature) return 0;
|
||||
const idx = Number.isFinite(progress.nextCityIndex) ? Math.floor(progress.nextCityIndex) : 0;
|
||||
if (idx < 0 || idx >= listLength) return 0;
|
||||
return idx;
|
||||
}
|
||||
|
||||
function clearProgress() {
|
||||
try { localStorage.removeItem(PROGRESS_STORAGE_KEY); } catch (_) {}
|
||||
enqueueRemoteProgressSync({
|
||||
action: 'clear',
|
||||
progress_key: PROGRESS_KEY,
|
||||
device_id: DEVICE_ID,
|
||||
});
|
||||
}
|
||||
|
||||
function gmGetJson(url) {
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -89,6 +202,76 @@
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function gmPostJson(url, data) {
|
||||
return new Promise((resolve, reject) => {
|
||||
GM_xmlhttpRequest({
|
||||
method: 'POST',
|
||||
url,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
data: JSON.stringify(data || {}),
|
||||
onload(res) {
|
||||
try {
|
||||
const json = JSON.parse(res.responseText || '{}');
|
||||
resolve(json);
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
},
|
||||
onerror(err) { reject(err); }
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function enqueueRemoteProgressSync(payload) {
|
||||
if (!PROGRESS_SYNC_ENABLED) return;
|
||||
if (!payload || typeof payload !== 'object') return;
|
||||
progressSyncPendingPayload = payload;
|
||||
if (progressSyncInFlight) return;
|
||||
flushRemoteProgressSync();
|
||||
}
|
||||
|
||||
async function flushRemoteProgressSync() {
|
||||
if (!PROGRESS_SYNC_ENABLED) return;
|
||||
if (progressSyncInFlight) return;
|
||||
|
||||
progressSyncInFlight = true;
|
||||
try {
|
||||
while (progressSyncPendingPayload) {
|
||||
const payload = progressSyncPendingPayload;
|
||||
progressSyncPendingPayload = null;
|
||||
try {
|
||||
await gmPostJson(PROGRESS_API, payload);
|
||||
} catch (e) {
|
||||
err('同步远端进度失败', e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
progressSyncInFlight = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function restoreRemoteProgress(areaSignature, listLength) {
|
||||
if (!PROGRESS_SYNC_ENABLED) return 0;
|
||||
try {
|
||||
const url = `${PROGRESS_API}&progress_key=${encodeURIComponent(PROGRESS_KEY)}`;
|
||||
const response = await gmGetJson(url);
|
||||
const data = response && response.data ? response.data : null;
|
||||
if (!data || typeof data !== 'object') return 0;
|
||||
|
||||
const remoteSignature = String(data.area_signature || '');
|
||||
if (!remoteSignature || remoteSignature !== areaSignature) return 0;
|
||||
|
||||
const idxRaw = data.next_city_index;
|
||||
const idx = Number.isFinite(idxRaw) ? Math.floor(idxRaw) : Math.floor(Number(idxRaw || 0));
|
||||
if (!Number.isFinite(idx) || idx < 0 || idx >= listLength) return 0;
|
||||
return idx;
|
||||
} catch (e) {
|
||||
err('读取远端进度失败', e);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function setNativeValue(el, value) {
|
||||
if (!el) return;
|
||||
@@ -263,6 +446,11 @@
|
||||
let scrolls = 0;
|
||||
|
||||
while (!stopFlag) {
|
||||
if (skipCurrentCityFlag) {
|
||||
statusNode.textContent = '收到跳过指令,结束当前地区滚动。';
|
||||
break;
|
||||
}
|
||||
|
||||
scrolls++;
|
||||
if (scrolls > maxScrolls) {
|
||||
statusNode.textContent = `达到单次搜索最大滚动 ${maxScrolls},停止本次自动下滑。`;
|
||||
@@ -276,7 +464,12 @@
|
||||
window.scrollTo(0, (document.body.scrollHeight || document.documentElement.scrollHeight));
|
||||
}
|
||||
|
||||
await new Promise(r => setTimeout(r, SCROLL_INTERVAL_MS));
|
||||
await sleep(SCROLL_INTERVAL_MS);
|
||||
|
||||
if (skipCurrentCityFlag) {
|
||||
statusNode.textContent = '收到跳过指令,结束当前地区滚动。';
|
||||
break;
|
||||
}
|
||||
|
||||
const curHeight = document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||||
if (curHeight === lastHeight) {
|
||||
@@ -391,7 +584,9 @@
|
||||
const css = `
|
||||
#dm-batch-btn { position: fixed; right: 12px; bottom: 12px; z-index:999999; background: rgba(0,0,0,0.65); color:#fff;
|
||||
padding:8px 10px; border-radius:8px; font-size:13px; cursor:pointer; user-select:none;}
|
||||
#dm-batch-status { position: fixed; right:12px; bottom:56px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff;
|
||||
#dm-batch-skip { position: fixed; right:12px; bottom:50px; z-index:999999; background: rgba(30,30,30,0.72); color:#fff;
|
||||
padding:7px 10px; border-radius:8px; font-size:12px; cursor:pointer; user-select:none;}
|
||||
#dm-batch-status { position: fixed; right:12px; bottom:88px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff;
|
||||
padding:6px 8px; border-radius:6px; font-size:12px; max-width:320px; word-break:break-word;}
|
||||
`;
|
||||
const s = document.createElement('style'); s.textContent = css; document.head && document.head.appendChild(s);
|
||||
@@ -401,6 +596,11 @@
|
||||
btn.textContent = 'BatchSearch:停止';
|
||||
btn.dataset.running = '1';
|
||||
document.body.appendChild(btn);
|
||||
|
||||
const skipBtn = document.createElement('div');
|
||||
skipBtn.id = 'dm-batch-skip';
|
||||
skipBtn.textContent = 'BatchSearch:跳过当前';
|
||||
document.body.appendChild(skipBtn);
|
||||
|
||||
const status = document.createElement('div');
|
||||
status.id = 'dm-batch-status';
|
||||
@@ -411,21 +611,55 @@
|
||||
const running = btn.dataset.running === '1';
|
||||
btn.dataset.running = running ? '0' : '1';
|
||||
btn.textContent = running ? 'BatchSearch:已停止' : 'BatchSearch:停止';
|
||||
status.textContent = running ? '已手动停止' : '已启动';
|
||||
status.textContent = running ? '已手动停止(已保存断点)' : '已启动';
|
||||
stopFlag = running; // if was running and clicked -> set stopFlag true; if restarting, set false
|
||||
if (running) {
|
||||
skipCurrentCityFlag = false;
|
||||
persistProgress(Math.max(currentCityIndex, 0), 'manual_pause');
|
||||
}
|
||||
if (!stopFlag) {
|
||||
// restart loop if needed
|
||||
runBatchSearchLoop(status).catch(e => err(e));
|
||||
}
|
||||
});
|
||||
|
||||
skipBtn.addEventListener('click', () => {
|
||||
if (currentCityIndex < 0) {
|
||||
status.textContent = '当前还未开始处理城市,稍后再跳过。';
|
||||
return;
|
||||
}
|
||||
skipCurrentCityFlag = true;
|
||||
const areaName = getAreaRowName(areaList[currentCityIndex] || {});
|
||||
status.textContent = `收到跳过指令:${areaName || `索引${currentCityIndex}`}`;
|
||||
});
|
||||
|
||||
skipBtn.addEventListener('contextmenu', (event) => {
|
||||
event.preventDefault();
|
||||
clearProgress();
|
||||
currentCityIndex = 0;
|
||||
status.textContent = '已清除断点。下次将从第 1 个地区开始。';
|
||||
});
|
||||
|
||||
return { btn, status };
|
||||
return { btn, skipBtn, status };
|
||||
}
|
||||
|
||||
/********************* 主流程:获取城市并循环搜索 *********************/
|
||||
async function runBatchSearchLoop(statusNode) {
|
||||
if (isLoopRunning) {
|
||||
statusNode.textContent = '批量任务已在运行中,请勿重复启动。';
|
||||
return;
|
||||
}
|
||||
|
||||
isLoopRunning = true;
|
||||
try {
|
||||
stopFlag = (document.getElementById('dm-batch-btn') && document.getElementById('dm-batch-btn').dataset.running === '0');
|
||||
skipCurrentCityFlag = false;
|
||||
|
||||
if (stopFlag) {
|
||||
statusNode.textContent = '当前是暂停状态,点击“BatchSearch:停止”可继续。';
|
||||
return;
|
||||
}
|
||||
|
||||
// 获取 area list(仅在内存为空时获取)
|
||||
if (!areaList || !Array.isArray(areaList) || areaList.length === 0) {
|
||||
statusNode.textContent = '正在获取城市列表...';
|
||||
@@ -450,6 +684,20 @@
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
currentAreaSignature = buildAreaSignature(areaList);
|
||||
const restoredIndexLocal = restoreProgress(currentAreaSignature, areaList.length);
|
||||
const restoredIndexRemote = await restoreRemoteProgress(currentAreaSignature, areaList.length);
|
||||
const restoredIndex = Math.max(restoredIndexLocal, restoredIndexRemote);
|
||||
const startIndex = (currentCityIndex >= 0 && currentCityIndex < areaList.length)
|
||||
? currentCityIndex
|
||||
: restoredIndex;
|
||||
currentCityIndex = startIndex;
|
||||
|
||||
if (startIndex > 0) {
|
||||
statusNode.textContent = `检测到断点(本地:${restoredIndexLocal + 1} 远端:${restoredIndexRemote + 1}),将从第 ${startIndex + 1}/${areaList.length} 个地区继续。`;
|
||||
await sleep(500);
|
||||
}
|
||||
|
||||
// 等待搜索输入与按钮可用
|
||||
try {
|
||||
@@ -459,13 +707,28 @@
|
||||
statusNode.textContent = '未找到搜索输入或按钮,脚本仍会监听接口,但无法自动搜索。';
|
||||
return;
|
||||
}
|
||||
|
||||
let completedAll = true;
|
||||
|
||||
// 主循环:对每个 city 执行搜索 -> 下滑 -> 发送结果 -> 下一 city
|
||||
for (let i = 0; i < areaList.length; i++) {
|
||||
if (stopFlag) { statusNode.textContent = '已停止'; break; }
|
||||
for (let i = startIndex; i < areaList.length; i++) {
|
||||
if (stopFlag) {
|
||||
completedAll = false;
|
||||
persistProgress(i, 'manual_stop');
|
||||
statusNode.textContent = '已停止(断点已保存)。';
|
||||
break;
|
||||
}
|
||||
|
||||
currentCityIndex = i;
|
||||
skipCurrentCityFlag = false;
|
||||
persistProgress(i, 'start_city');
|
||||
|
||||
const city = (areaList[i].city || areaList[i].province || '').trim();
|
||||
if (!city) continue;
|
||||
if (!city) {
|
||||
persistProgress(i + 1, 'empty_city');
|
||||
continue;
|
||||
}
|
||||
|
||||
const keyword = `${city}律师`;
|
||||
statusNode.textContent = `正在搜索:${keyword} (${i+1}/${areaList.length})`;
|
||||
log(`开始城市[${i+1}/${areaList.length}] 搜索:`, keyword);
|
||||
@@ -476,6 +739,8 @@
|
||||
} catch (e) {
|
||||
err('刷新搜索控件失败', e);
|
||||
statusNode.textContent = '刷新搜索控件失败,终止批量搜索。';
|
||||
completedAll = false;
|
||||
persistProgress(i, 'search_control_error');
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -488,6 +753,8 @@
|
||||
await ensureSearchControls(statusNode);
|
||||
if (!simulateSearchTrigger()) {
|
||||
statusNode.textContent = '搜索触发失败,终止批量搜索。';
|
||||
completedAll = false;
|
||||
persistProgress(i, 'search_trigger_error');
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -497,25 +764,55 @@
|
||||
|
||||
// 自动下滑直到稳定或达到上限
|
||||
await autoScrollUntilStable(statusNode, MAX_SCROLLS_PER_CITY);
|
||||
|
||||
if (skipCurrentCityFlag) {
|
||||
skipCurrentCityFlag = false;
|
||||
persistProgress(i + 1, 'skip_city');
|
||||
statusNode.textContent = `已跳过 ${keyword},继续下一个地区...`;
|
||||
await sleep(Math.min(DELAY_BETWEEN_CITIES_MS, 800));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stopFlag) { statusNode.textContent = '已停止'; break; }
|
||||
if (stopFlag) {
|
||||
completedAll = false;
|
||||
persistProgress(i, 'manual_stop_after_scroll');
|
||||
statusNode.textContent = '已停止(断点已保存)。';
|
||||
break;
|
||||
}
|
||||
|
||||
persistProgress(i + 1, 'city_done');
|
||||
|
||||
// 等待短暂间隔再进行下一个城市
|
||||
statusNode.textContent = `完成 ${keyword} 的加载,等待 ${DELAY_BETWEEN_CITIES_MS} ms 后继续...`;
|
||||
await new Promise(r => setTimeout(r, DELAY_BETWEEN_CITIES_MS));
|
||||
await sleep(DELAY_BETWEEN_CITIES_MS);
|
||||
}
|
||||
|
||||
if (completedAll && !stopFlag) {
|
||||
clearProgress();
|
||||
currentCityIndex = -1;
|
||||
statusNode.textContent = '批量搜索完成,已清除断点进度。';
|
||||
log('批量搜索循环结束: completed');
|
||||
} else {
|
||||
log('批量搜索循环结束: paused/broken');
|
||||
}
|
||||
|
||||
statusNode.textContent = '批量搜索完成或已停止。';
|
||||
log('批量搜索循环结束');
|
||||
} catch (e) {
|
||||
err('runBatchSearchLoop error', e);
|
||||
persistProgress(Math.max(currentCityIndex, 0), 'loop_exception');
|
||||
} finally {
|
||||
isLoopRunning = false;
|
||||
}
|
||||
}
|
||||
|
||||
/********************* 启动脚本 *********************/
|
||||
(function init() {
|
||||
window.addEventListener('beforeunload', () => {
|
||||
if (currentCityIndex >= 0) {
|
||||
persistProgress(Math.max(currentCityIndex, 0), 'page_unload');
|
||||
}
|
||||
});
|
||||
|
||||
const ui = createUI();
|
||||
ui.status.textContent = '就绪 - 点击右下按钮可停止/重启批量搜索';
|
||||
ui.status.textContent = '就绪 - 可暂停/跳过,自动保存断点(右键跳过按钮可清除断点)';
|
||||
console.log(location.pathname)
|
||||
// 如果当前为目标页面(/jingxuan/search/),则自动启动;否则仍可在任何页面打开并手动启动。
|
||||
const isAutoPage = location.pathname && location.pathname.indexOf('/search/') !== -1;
|
||||
|
||||
Reference in New Issue
Block a user