536 lines
20 KiB
JavaScript
536 lines
20 KiB
JavaScript
// ==UserScript==
|
||
// @name Douyin Batch City Search + AutoScroll + Capture
|
||
// @namespace http://tampermonkey.net/
|
||
// @version 1.0
|
||
// @description 从 Python 服务获取地区列表,按 city + "律师" 搜索并自动下滑,拦截 /aweme/v1/web/discover/search/ 返回并转发到入库接口。
|
||
// @author You
|
||
// @match https://www.douyin.com/*
|
||
// @grant GM_xmlhttpRequest
|
||
// @connect *
|
||
// @run-at document-idle
|
||
// ==/UserScript==
|
||
|
||
(function () {
|
||
'use strict';
|
||
|
||
/********************* 配置区(按需修改) *********************/
|
||
const API_BASE = 'http://127.0.0.1:9002'; // 改成你部署 Python 服务的地址,例如 http://nas.nepiedg.site:9002
|
||
const AREA_API = `${API_BASE}/api/layer/get_area?server=1`; // 获取城市列表的接口
|
||
const SEND_TARGETS = [
|
||
`${API_BASE}/api/layer/index?server=1&save_only=0`
|
||
];
|
||
|
||
// 搜索框与按钮选择器(根据页面更新)
|
||
const SEARCH_INPUT_SELECTORS = [
|
||
'input[data-e2e="search-input"]',
|
||
'input[data-e2e="searchbar-input"]',
|
||
'form[data-e2e="searchbar"] input',
|
||
'input[placeholder*="搜索"]'
|
||
];
|
||
const SEARCH_BTN_SELECTORS = [
|
||
'[data-e2e="search-button"]',
|
||
'button[data-e2e="search-button"]',
|
||
'span[data-e2e="search-button"]',
|
||
'button[data-e2e="searchbar-button"]',
|
||
'span.btn-title'
|
||
];
|
||
|
||
// 每个城市搜索时的自动下滑配置
|
||
const SCROLL_INTERVAL_MS = 2000;
|
||
const MAX_STABLE_COUNT = 6;
|
||
const MAX_SCROLLS_PER_CITY = 120;
|
||
const SCROLL_BY = 2200;
|
||
const WAIT_AFTER_SEARCH_MS = 1000;
|
||
const DELAY_BETWEEN_CITIES_MS = 1500;
|
||
|
||
// 可选:如果希望只发送包含手机号的条目,可在此启用并调整正则
|
||
const ONLY_SEND_IF_HAS_PHONE = false;
|
||
const PHONE_REGEX = /(?:\+?86)?1[3-9]\d{9}/g;
|
||
|
||
/********************* 运行时状态 *********************/
|
||
let areaList = [];
|
||
let stopFlag = false; // 由 UI 控制,true 表示停止整个任务
|
||
let currentCityIndex = -1;
|
||
let inputEl = null;
|
||
let btnEl = null;
|
||
|
||
// 节流/去重发送
|
||
let lastSentHash = null;
|
||
let lastSentAt = 0;
|
||
const SEND_MIN_INTERVAL_MS = 800;
|
||
|
||
/********************* 工具函数 *********************/
|
||
function log(...args) { console.log('[DouyinBatch] ', ...args); }
|
||
function err(...args) { console.error('[DouyinBatch] ', ...args); }
|
||
|
||
function hashString(str) {
|
||
let h = 2166136261 >>> 0;
|
||
for (let i = 0; i < str.length; i++) {
|
||
h ^= str.charCodeAt(i);
|
||
h = Math.imul(h, 16777619) >>> 0;
|
||
}
|
||
return h.toString(16);
|
||
}
|
||
|
||
function gmGetJson(url) {
|
||
return new Promise((resolve, reject) => {
|
||
GM_xmlhttpRequest({
|
||
method: 'GET',
|
||
url,
|
||
onload(res) {
|
||
try {
|
||
const json = JSON.parse(res.responseText);
|
||
resolve(json);
|
||
} catch (e) {
|
||
reject(e);
|
||
}
|
||
},
|
||
onerror(err) { reject(err); }
|
||
});
|
||
});
|
||
}
|
||
|
||
function setNativeValue(el, value) {
|
||
if (!el) return;
|
||
const prototype = el.constructor && el.constructor.prototype ? el.constructor.prototype : window.HTMLInputElement && window.HTMLInputElement.prototype;
|
||
const descriptor = prototype ? Object.getOwnPropertyDescriptor(prototype, 'value') : null;
|
||
if (descriptor && descriptor.set) {
|
||
descriptor.set.call(el, value);
|
||
} else {
|
||
el.value = value;
|
||
}
|
||
}
|
||
|
||
async function simulateSearchInput(keyword) {
|
||
if (!inputEl) return;
|
||
try {
|
||
inputEl.focus();
|
||
inputEl.dispatchEvent(new Event('focus', { bubbles: false }));
|
||
|
||
// 清空旧值并触发事件
|
||
if (inputEl.value) {
|
||
setNativeValue(inputEl, '');
|
||
if (typeof InputEvent === 'function') {
|
||
inputEl.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'deleteContentBackward', data: '' }));
|
||
} else {
|
||
inputEl.dispatchEvent(new Event('input', { bubbles: true }));
|
||
}
|
||
}
|
||
|
||
setNativeValue(inputEl, keyword);
|
||
if (typeof InputEvent === 'function') {
|
||
inputEl.dispatchEvent(new InputEvent('beforeinput', { bubbles: true, inputType: 'insertText', data: keyword }));
|
||
inputEl.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: keyword }));
|
||
} else {
|
||
inputEl.dispatchEvent(new Event('input', { bubbles: true }));
|
||
}
|
||
inputEl.dispatchEvent(new Event('change', { bubbles: true }));
|
||
inputEl.dispatchEvent(new Event('blur', { bubbles: false }));
|
||
} catch (e) {
|
||
err('simulateSearchInput error', e);
|
||
}
|
||
await new Promise(r => setTimeout(r, 80));
|
||
}
|
||
|
||
function simulateSearchTrigger() {
|
||
let triggered = false;
|
||
if (btnEl && btnEl.isConnected) {
|
||
try {
|
||
btnEl.focus();
|
||
btnEl.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true, view: window }));
|
||
btnEl.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true, view: window }));
|
||
btnEl.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window }));
|
||
triggered = true;
|
||
} catch (e) {
|
||
err('simulateSearchTrigger click error', e);
|
||
}
|
||
}
|
||
|
||
if (!triggered && inputEl) {
|
||
try {
|
||
const opts = { bubbles: true, cancelable: true, key: 'Enter', code: 'Enter', keyCode: 13, which: 13 };
|
||
inputEl.dispatchEvent(new KeyboardEvent('keydown', opts));
|
||
inputEl.dispatchEvent(new KeyboardEvent('keypress', opts));
|
||
inputEl.dispatchEvent(new KeyboardEvent('keyup', opts));
|
||
triggered = true;
|
||
} catch (e) {
|
||
err('Enter 触发搜索失败', e);
|
||
}
|
||
}
|
||
|
||
return triggered;
|
||
}
|
||
|
||
function sendToTargets(data) {
|
||
try {
|
||
const body = typeof data === 'string' ? data : JSON.stringify(data);
|
||
if (ONLY_SEND_IF_HAS_PHONE) {
|
||
if (!PHONE_REGEX.test(body)) {
|
||
// 未匹配手机号则跳过发送
|
||
return;
|
||
}
|
||
}
|
||
const hash = hashString(body);
|
||
const now = Date.now();
|
||
if (hash === lastSentHash && now - lastSentAt < SEND_MIN_INTERVAL_MS) {
|
||
return;
|
||
}
|
||
lastSentHash = hash;
|
||
lastSentAt = now;
|
||
|
||
for (const target of SEND_TARGETS) {
|
||
GM_xmlhttpRequest({
|
||
method: 'POST',
|
||
url: target,
|
||
headers: { 'Content-Type': 'application/json' },
|
||
data: body,
|
||
onload(res) { log(`sent -> ${target}, status: ${res.status}`); },
|
||
onerror(e) { err(`send error to ${target}`, e); }
|
||
});
|
||
}
|
||
} catch (e) {
|
||
err('sendToTargets error', e);
|
||
}
|
||
}
|
||
|
||
/********************* 拦截 fetch 与 XHR(捕获目标接口返回) *********************/
|
||
const TARGET_PATH = '/aweme/v1/web/discover/search/';
|
||
|
||
(function interceptFetch() {
|
||
if (!window.fetch) return;
|
||
const orig = window.fetch.bind(window);
|
||
window.fetch = function (...args) {
|
||
try {
|
||
const resource = args[0];
|
||
const url = (typeof resource === 'string') ? resource : (resource && resource.url) ? resource.url : '';
|
||
if (url && url.includes(TARGET_PATH)) {
|
||
return orig(...args).then((response) => {
|
||
try {
|
||
const cloned = response.clone();
|
||
cloned.json().then((json) => {
|
||
if (json && typeof json === 'object') {
|
||
sendToTargets({ source: 'fetch', url, data: json, ts: Date.now(), cityIndex: currentCityIndex });
|
||
}
|
||
}).catch(()=>{});
|
||
} catch (e) { /* ignore */ }
|
||
return response;
|
||
});
|
||
}
|
||
} catch (e) { err('fetch wrapper error', e); }
|
||
return orig(...args);
|
||
};
|
||
})();
|
||
|
||
(function interceptXHR() {
|
||
const XHR = window.XMLHttpRequest;
|
||
if (!XHR) return;
|
||
const origOpen = XHR.prototype.open;
|
||
const origSend = XHR.prototype.send;
|
||
|
||
XHR.prototype.open = function (method, url, ...rest) {
|
||
try { this.__dm_url = (typeof url === 'string') ? url : ''; } catch(e){}
|
||
return origOpen.apply(this, [method, url, ...rest]);
|
||
};
|
||
|
||
XHR.prototype.send = function (body) {
|
||
try {
|
||
const targetUrl = this.__dm_url || '';
|
||
if (targetUrl && targetUrl.includes(TARGET_PATH)) {
|
||
this.addEventListener('readystatechange', function () {
|
||
if (this.readyState === 4) {
|
||
try {
|
||
const text = this.responseText;
|
||
if (!text) return;
|
||
try {
|
||
const json = JSON.parse(text);
|
||
sendToTargets({ source: 'xhr', url: targetUrl, data: json, ts: Date.now(), cityIndex: currentCityIndex });
|
||
} catch (err) {
|
||
// 非 json 忽略
|
||
}
|
||
} catch (e) { /* ignore */ }
|
||
}
|
||
});
|
||
}
|
||
} catch (e) { err('XHR wrapper error', e); }
|
||
return origSend.apply(this, [body]);
|
||
};
|
||
})();
|
||
|
||
/********************* 自动下滑函数(单次搜索) *********************/
|
||
async function autoScrollUntilStable(statusNode, maxScrolls = MAX_SCROLLS_PER_CITY) {
|
||
let lastHeight = -1;
|
||
let stableCount = 0;
|
||
let scrolls = 0;
|
||
|
||
while (!stopFlag) {
|
||
scrolls++;
|
||
if (scrolls > maxScrolls) {
|
||
statusNode.textContent = `达到单次搜索最大滚动 ${maxScrolls},停止本次自动下滑。`;
|
||
break;
|
||
}
|
||
|
||
// 执行滚动
|
||
try {
|
||
window.scrollBy({ top: SCROLL_BY, left: 0, behavior: 'smooth' });
|
||
} catch (e) {
|
||
window.scrollTo(0, (document.body.scrollHeight || document.documentElement.scrollHeight));
|
||
}
|
||
|
||
await new Promise(r => setTimeout(r, SCROLL_INTERVAL_MS));
|
||
|
||
const curHeight = document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||
if (curHeight === lastHeight) {
|
||
stableCount++;
|
||
} else {
|
||
stableCount = 0;
|
||
lastHeight = curHeight;
|
||
}
|
||
|
||
statusNode.textContent = `滚动次数: ${scrolls}, 稳定计数: ${stableCount}/${MAX_STABLE_COUNT}`;
|
||
|
||
if (stableCount >= MAX_STABLE_COUNT) {
|
||
statusNode.textContent = `页面高度稳定 (${stableCount}), 本次搜索加载结束。`;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/********************* 页面元素辅助:等待元素出现 *********************/
|
||
function waitForSelector(selector, timeout = 10000) {
|
||
const selectors = Array.isArray(selector) ? selector.filter(Boolean) : [selector];
|
||
return new Promise((resolve, reject) => {
|
||
let timer;
|
||
const root = document.documentElement || document.body;
|
||
|
||
const cleanup = (observer) => {
|
||
try { observer && observer.disconnect(); } catch (_) {}
|
||
if (timer) clearTimeout(timer);
|
||
};
|
||
|
||
const pick = () => {
|
||
for (const sel of selectors) {
|
||
if (!sel) continue;
|
||
try {
|
||
const found = document.querySelector(sel);
|
||
if (found) {
|
||
return found;
|
||
}
|
||
} catch (e) {
|
||
err('query selector error', sel, e);
|
||
}
|
||
}
|
||
return null;
|
||
};
|
||
|
||
const immediate = pick();
|
||
if (immediate) {
|
||
return resolve(immediate);
|
||
}
|
||
|
||
const observer = new MutationObserver(() => {
|
||
const node = pick();
|
||
if (node) {
|
||
cleanup(observer);
|
||
resolve(node);
|
||
}
|
||
});
|
||
|
||
if (root) {
|
||
observer.observe(root, { childList: true, subtree: true });
|
||
}
|
||
|
||
timer = setTimeout(() => {
|
||
cleanup(observer);
|
||
reject(new Error('timeout waiting for ' + selectors.join(', ')));
|
||
}, timeout);
|
||
});
|
||
}
|
||
|
||
async function ensureSearchControls(statusNode) {
|
||
const isConnected = (node) => {
|
||
if (!node) return false;
|
||
try {
|
||
if (node.isConnected !== undefined) return node.isConnected;
|
||
return document.contains(node);
|
||
} catch (_) {
|
||
return false;
|
||
}
|
||
};
|
||
|
||
if (!isConnected(inputEl)) inputEl = null;
|
||
if (!isConnected(btnEl)) btnEl = null;
|
||
|
||
if (!inputEl) {
|
||
statusNode && (statusNode.textContent = '等待搜索输入框可用...');
|
||
inputEl = await waitForSelector(SEARCH_INPUT_SELECTORS, 10000);
|
||
}
|
||
|
||
if (!btnEl) {
|
||
try {
|
||
statusNode && (statusNode.textContent = '等待搜索按钮可用...');
|
||
btnEl = await waitForSelector(SEARCH_BTN_SELECTORS, 8000);
|
||
if (btnEl && btnEl.tagName !== 'BUTTON') {
|
||
const maybeButton = btnEl.closest('button');
|
||
if (maybeButton) btnEl = maybeButton;
|
||
}
|
||
} catch (e) {
|
||
btnEl = null;
|
||
err('未找到搜索按钮,将使用 Enter 键进行触发。');
|
||
}
|
||
}
|
||
|
||
if (!inputEl) {
|
||
throw new Error('未定位到搜索输入框');
|
||
}
|
||
|
||
return { inputEl, btnEl };
|
||
}
|
||
|
||
/********************* UI 控制(右下角) *********************/
|
||
function createUI() {
|
||
const css = `
|
||
#dm-batch-btn { position: fixed; right: 12px; bottom: 12px; z-index:999999; background: rgba(0,0,0,0.65); color:#fff;
|
||
padding:8px 10px; border-radius:8px; font-size:13px; cursor:pointer; user-select:none;}
|
||
#dm-batch-status { position: fixed; right:12px; bottom:56px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff;
|
||
padding:6px 8px; border-radius:6px; font-size:12px; max-width:320px; word-break:break-word;}
|
||
`;
|
||
const s = document.createElement('style'); s.textContent = css; document.head && document.head.appendChild(s);
|
||
|
||
const btn = document.createElement('div');
|
||
btn.id = 'dm-batch-btn';
|
||
btn.textContent = 'BatchSearch:停止';
|
||
btn.dataset.running = '1';
|
||
document.body.appendChild(btn);
|
||
|
||
const status = document.createElement('div');
|
||
status.id = 'dm-batch-status';
|
||
status.textContent = '准备中...';
|
||
document.body.appendChild(status);
|
||
|
||
btn.addEventListener('click', () => {
|
||
const running = btn.dataset.running === '1';
|
||
btn.dataset.running = running ? '0' : '1';
|
||
btn.textContent = running ? 'BatchSearch:已停止' : 'BatchSearch:停止';
|
||
status.textContent = running ? '已手动停止' : '已启动';
|
||
stopFlag = running; // if was running and clicked -> set stopFlag true; if restarting, set false
|
||
if (!stopFlag) {
|
||
// restart loop if needed
|
||
runBatchSearchLoop(status).catch(e => err(e));
|
||
}
|
||
});
|
||
|
||
return { btn, status };
|
||
}
|
||
|
||
/********************* 主流程:获取城市并循环搜索 *********************/
|
||
async function runBatchSearchLoop(statusNode) {
|
||
try {
|
||
stopFlag = (document.getElementById('dm-batch-btn') && document.getElementById('dm-batch-btn').dataset.running === '0');
|
||
// 获取 area list(仅在内存为空时获取)
|
||
if (!areaList || !Array.isArray(areaList) || areaList.length === 0) {
|
||
statusNode.textContent = '正在获取城市列表...';
|
||
try {
|
||
const data = await gmGetJson(AREA_API);
|
||
const normalizedAreaList = Array.isArray(data)
|
||
? data
|
||
: (data && Array.isArray(data.data) ? data.data : []);
|
||
|
||
if (normalizedAreaList.length > 0) {
|
||
areaList = normalizedAreaList;
|
||
log('获取城市列表数量:', areaList.length);
|
||
statusNode.textContent = `获取到 ${areaList.length} 个城市,准备开始循环。`;
|
||
} else {
|
||
err('area API returned not array', data);
|
||
statusNode.textContent = '获取城市列表失败(返回格式异常)';
|
||
return;
|
||
}
|
||
} catch (e) {
|
||
err('获取城市列表失败', e);
|
||
statusNode.textContent = '获取城市列表失败: ' + e.message;
|
||
return;
|
||
}
|
||
}
|
||
|
||
// 等待搜索输入与按钮可用
|
||
try {
|
||
await ensureSearchControls(statusNode);
|
||
} catch (e) {
|
||
err('未找到搜索输入或按钮', e);
|
||
statusNode.textContent = '未找到搜索输入或按钮,脚本仍会监听接口,但无法自动搜索。';
|
||
return;
|
||
}
|
||
|
||
// 主循环:对每个 city 执行搜索 -> 下滑 -> 发送结果 -> 下一 city
|
||
for (let i = 0; i < areaList.length; i++) {
|
||
if (stopFlag) { statusNode.textContent = '已停止'; break; }
|
||
currentCityIndex = i;
|
||
const city = (areaList[i].city || areaList[i].province || '').trim();
|
||
if (!city) continue;
|
||
const keyword = `${city}律师`;
|
||
statusNode.textContent = `正在搜索:${keyword} (${i+1}/${areaList.length})`;
|
||
log(`开始城市[${i+1}/${areaList.length}] 搜索:`, keyword);
|
||
|
||
// 将搜索词放入输入框 (触发 input 事件)
|
||
try {
|
||
await ensureSearchControls(statusNode);
|
||
} catch (e) {
|
||
err('刷新搜索控件失败', e);
|
||
statusNode.textContent = '刷新搜索控件失败,终止批量搜索。';
|
||
break;
|
||
}
|
||
|
||
await simulateSearchInput(keyword);
|
||
|
||
const triggered = simulateSearchTrigger();
|
||
if (!triggered) {
|
||
statusNode.textContent = '搜索触发失败,尝试刷新控件...';
|
||
btnEl = null;
|
||
await ensureSearchControls(statusNode);
|
||
if (!simulateSearchTrigger()) {
|
||
statusNode.textContent = '搜索触发失败,终止批量搜索。';
|
||
break;
|
||
}
|
||
}
|
||
|
||
// 等待搜索结果开始加载
|
||
await new Promise(r => setTimeout(r, WAIT_AFTER_SEARCH_MS));
|
||
|
||
// 自动下滑直到稳定或达到上限
|
||
await autoScrollUntilStable(statusNode, MAX_SCROLLS_PER_CITY);
|
||
|
||
if (stopFlag) { statusNode.textContent = '已停止'; break; }
|
||
|
||
// 等待短暂间隔再进行下一个城市
|
||
statusNode.textContent = `完成 ${keyword} 的加载,等待 ${DELAY_BETWEEN_CITIES_MS} ms 后继续...`;
|
||
await new Promise(r => setTimeout(r, DELAY_BETWEEN_CITIES_MS));
|
||
}
|
||
|
||
statusNode.textContent = '批量搜索完成或已停止。';
|
||
log('批量搜索循环结束');
|
||
} catch (e) {
|
||
err('runBatchSearchLoop error', e);
|
||
}
|
||
}
|
||
|
||
/********************* 启动脚本 *********************/
|
||
(function init() {
|
||
const ui = createUI();
|
||
ui.status.textContent = '就绪 - 点击右下按钮可停止/重启批量搜索';
|
||
console.log(location.pathname)
|
||
// 如果当前为目标页面(/jingxuan/search/),则自动启动;否则仍可在任何页面打开并手动启动。
|
||
const isAutoPage = location.pathname && location.pathname.indexOf('/search/') !== -1;
|
||
if (isAutoPage) {
|
||
ui.status.textContent = '检测到 /jingxuan/search/ 页面,准备开始批量搜索...';
|
||
// 给页面一点时间加载必要脚本与 dom
|
||
setTimeout(() => {
|
||
runBatchSearchLoop(ui.status).catch(e => err(e));
|
||
}, 800);
|
||
} else {
|
||
// 非目标页面,仍可手动点击按钮(按钮初始化为运行状态,点击色变为已停止)
|
||
ui.status.textContent = '非 /jingxuan/search/ 页面。导航至该页面或手动控制开始。';
|
||
}
|
||
})();
|
||
|
||
})();
|
||
|