// ==UserScript== // @name Douyin Batch City Search + AutoScroll + Capture // @namespace http://tampermonkey.net/ // @version 1.0 // @description 从 Python 服务获取地区列表,按 city + "律师" 搜索并自动下滑,拦截 /aweme/v1/web/discover/search/ 返回并转发到入库接口。 // @author You // @match https://www.douyin.com/* // @grant GM_xmlhttpRequest // @connect * // @run-at document-idle // ==/UserScript== (function () { 'use strict'; /********************* 配置区(按需修改) *********************/ const API_BASE = 'http://127.0.0.1:9002'; // 改成你部署 Python 服务的地址,例如 http://nas.nepiedg.site:9002 const AREA_API = `${API_BASE}/api/layer/get_area?server=1`; // 获取城市列表的接口 const SEND_TARGETS = [ `${API_BASE}/api/layer/index?server=1&save_only=0` ]; // 搜索框与按钮选择器(根据页面更新) const SEARCH_INPUT_SELECTORS = [ 'input[data-e2e="search-input"]', 'input[data-e2e="searchbar-input"]', 'form[data-e2e="searchbar"] input', 'input[placeholder*="搜索"]' ]; const SEARCH_BTN_SELECTORS = [ '[data-e2e="search-button"]', 'button[data-e2e="search-button"]', 'span[data-e2e="search-button"]', 'button[data-e2e="searchbar-button"]', 'span.btn-title' ]; // 每个城市搜索时的自动下滑配置 const SCROLL_INTERVAL_MS = 2000; const MAX_STABLE_COUNT = 6; const MAX_SCROLLS_PER_CITY = 120; const SCROLL_BY = 2200; const WAIT_AFTER_SEARCH_MS = 1000; const DELAY_BETWEEN_CITIES_MS = 1500; // 可选:如果希望只发送包含手机号的条目,可在此启用并调整正则 const ONLY_SEND_IF_HAS_PHONE = false; const PHONE_REGEX = /(?:\+?86)?1[3-9]\d{9}/g; /********************* 运行时状态 *********************/ let areaList = []; let stopFlag = false; // 由 UI 控制,true 表示停止整个任务 let currentCityIndex = -1; let inputEl = null; let btnEl = null; // 节流/去重发送 let lastSentHash = null; let lastSentAt = 0; const SEND_MIN_INTERVAL_MS = 800; /********************* 工具函数 *********************/ function log(...args) { console.log('[DouyinBatch] ', ...args); } function err(...args) { console.error('[DouyinBatch] ', ...args); } function hashString(str) { let h = 2166136261 >>> 0; for (let i = 0; i < str.length; i++) { h ^= str.charCodeAt(i); h = Math.imul(h, 16777619) >>> 0; } return h.toString(16); } function gmGetJson(url) { return new Promise((resolve, reject) => { GM_xmlhttpRequest({ method: 'GET', url, onload(res) { try { const json = JSON.parse(res.responseText); resolve(json); } catch (e) { reject(e); } }, onerror(err) { reject(err); } }); }); } function setNativeValue(el, value) { if (!el) return; const prototype = el.constructor && el.constructor.prototype ? el.constructor.prototype : window.HTMLInputElement && window.HTMLInputElement.prototype; const descriptor = prototype ? Object.getOwnPropertyDescriptor(prototype, 'value') : null; if (descriptor && descriptor.set) { descriptor.set.call(el, value); } else { el.value = value; } } async function simulateSearchInput(keyword) { if (!inputEl) return; try { inputEl.focus(); inputEl.dispatchEvent(new Event('focus', { bubbles: false })); // 清空旧值并触发事件 if (inputEl.value) { setNativeValue(inputEl, ''); if (typeof InputEvent === 'function') { inputEl.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'deleteContentBackward', data: '' })); } else { inputEl.dispatchEvent(new Event('input', { bubbles: true })); } } setNativeValue(inputEl, keyword); if (typeof InputEvent === 'function') { inputEl.dispatchEvent(new InputEvent('beforeinput', { bubbles: true, inputType: 'insertText', data: keyword })); inputEl.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: keyword })); } else { inputEl.dispatchEvent(new Event('input', { bubbles: true })); } inputEl.dispatchEvent(new Event('change', { bubbles: true })); inputEl.dispatchEvent(new Event('blur', { bubbles: false })); } catch (e) { err('simulateSearchInput error', e); } await new Promise(r => setTimeout(r, 80)); } function simulateSearchTrigger() { let triggered = false; if (btnEl && btnEl.isConnected) { try { btnEl.focus(); btnEl.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true, view: window })); btnEl.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true, view: window })); btnEl.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window })); triggered = true; } catch (e) { err('simulateSearchTrigger click error', e); } } if (!triggered && inputEl) { try { const opts = { bubbles: true, cancelable: true, key: 'Enter', code: 'Enter', keyCode: 13, which: 13 }; inputEl.dispatchEvent(new KeyboardEvent('keydown', opts)); inputEl.dispatchEvent(new KeyboardEvent('keypress', opts)); inputEl.dispatchEvent(new KeyboardEvent('keyup', opts)); triggered = true; } catch (e) { err('Enter 触发搜索失败', e); } } return triggered; } function sendToTargets(data) { try { const body = typeof data === 'string' ? data : JSON.stringify(data); if (ONLY_SEND_IF_HAS_PHONE) { if (!PHONE_REGEX.test(body)) { // 未匹配手机号则跳过发送 return; } } const hash = hashString(body); const now = Date.now(); if (hash === lastSentHash && now - lastSentAt < SEND_MIN_INTERVAL_MS) { return; } lastSentHash = hash; lastSentAt = now; for (const target of SEND_TARGETS) { GM_xmlhttpRequest({ method: 'POST', url: target, headers: { 'Content-Type': 'application/json' }, data: body, onload(res) { log(`sent -> ${target}, status: ${res.status}`); }, onerror(e) { err(`send error to ${target}`, e); } }); } } catch (e) { err('sendToTargets error', e); } } /********************* 拦截 fetch 与 XHR(捕获目标接口返回) *********************/ const TARGET_PATH = '/aweme/v1/web/discover/search/'; (function interceptFetch() { if (!window.fetch) return; const orig = window.fetch.bind(window); window.fetch = function (...args) { try { const resource = args[0]; const url = (typeof resource === 'string') ? resource : (resource && resource.url) ? resource.url : ''; if (url && url.includes(TARGET_PATH)) { return orig(...args).then((response) => { try { const cloned = response.clone(); cloned.json().then((json) => { if (json && typeof json === 'object') { sendToTargets({ source: 'fetch', url, data: json, ts: Date.now(), cityIndex: currentCityIndex }); } }).catch(()=>{}); } catch (e) { /* ignore */ } return response; }); } } catch (e) { err('fetch wrapper error', e); } return orig(...args); }; })(); (function interceptXHR() { const XHR = window.XMLHttpRequest; if (!XHR) return; const origOpen = XHR.prototype.open; const origSend = XHR.prototype.send; XHR.prototype.open = function (method, url, ...rest) { try { this.__dm_url = (typeof url === 'string') ? url : ''; } catch(e){} return origOpen.apply(this, [method, url, ...rest]); }; XHR.prototype.send = function (body) { try { const targetUrl = this.__dm_url || ''; if (targetUrl && targetUrl.includes(TARGET_PATH)) { this.addEventListener('readystatechange', function () { if (this.readyState === 4) { try { const text = this.responseText; if (!text) return; try { const json = JSON.parse(text); sendToTargets({ source: 'xhr', url: targetUrl, data: json, ts: Date.now(), cityIndex: currentCityIndex }); } catch (err) { // 非 json 忽略 } } catch (e) { /* ignore */ } } }); } } catch (e) { err('XHR wrapper error', e); } return origSend.apply(this, [body]); }; })(); /********************* 自动下滑函数(单次搜索) *********************/ async function autoScrollUntilStable(statusNode, maxScrolls = MAX_SCROLLS_PER_CITY) { let lastHeight = -1; let stableCount = 0; let scrolls = 0; while (!stopFlag) { scrolls++; if (scrolls > maxScrolls) { statusNode.textContent = `达到单次搜索最大滚动 ${maxScrolls},停止本次自动下滑。`; break; } // 执行滚动 try { window.scrollBy({ top: SCROLL_BY, left: 0, behavior: 'smooth' }); } catch (e) { window.scrollTo(0, (document.body.scrollHeight || document.documentElement.scrollHeight)); } await new Promise(r => setTimeout(r, SCROLL_INTERVAL_MS)); const curHeight = document.body.scrollHeight || document.documentElement.scrollHeight || 0; if (curHeight === lastHeight) { stableCount++; } else { stableCount = 0; lastHeight = curHeight; } statusNode.textContent = `滚动次数: ${scrolls}, 稳定计数: ${stableCount}/${MAX_STABLE_COUNT}`; if (stableCount >= MAX_STABLE_COUNT) { statusNode.textContent = `页面高度稳定 (${stableCount}), 本次搜索加载结束。`; break; } } } /********************* 页面元素辅助:等待元素出现 *********************/ function waitForSelector(selector, timeout = 10000) { const selectors = Array.isArray(selector) ? selector.filter(Boolean) : [selector]; return new Promise((resolve, reject) => { let timer; const root = document.documentElement || document.body; const cleanup = (observer) => { try { observer && observer.disconnect(); } catch (_) {} if (timer) clearTimeout(timer); }; const pick = () => { for (const sel of selectors) { if (!sel) continue; try { const found = document.querySelector(sel); if (found) { return found; } } catch (e) { err('query selector error', sel, e); } } return null; }; const immediate = pick(); if (immediate) { return resolve(immediate); } const observer = new MutationObserver(() => { const node = pick(); if (node) { cleanup(observer); resolve(node); } }); if (root) { observer.observe(root, { childList: true, subtree: true }); } timer = setTimeout(() => { cleanup(observer); reject(new Error('timeout waiting for ' + selectors.join(', '))); }, timeout); }); } async function ensureSearchControls(statusNode) { const isConnected = (node) => { if (!node) return false; try { if (node.isConnected !== undefined) return node.isConnected; return document.contains(node); } catch (_) { return false; } }; if (!isConnected(inputEl)) inputEl = null; if (!isConnected(btnEl)) btnEl = null; if (!inputEl) { statusNode && (statusNode.textContent = '等待搜索输入框可用...'); inputEl = await waitForSelector(SEARCH_INPUT_SELECTORS, 10000); } if (!btnEl) { try { statusNode && (statusNode.textContent = '等待搜索按钮可用...'); btnEl = await waitForSelector(SEARCH_BTN_SELECTORS, 8000); if (btnEl && btnEl.tagName !== 'BUTTON') { const maybeButton = btnEl.closest('button'); if (maybeButton) btnEl = maybeButton; } } catch (e) { btnEl = null; err('未找到搜索按钮,将使用 Enter 键进行触发。'); } } if (!inputEl) { throw new Error('未定位到搜索输入框'); } return { inputEl, btnEl }; } /********************* UI 控制(右下角) *********************/ function createUI() { const css = ` #dm-batch-btn { position: fixed; right: 12px; bottom: 12px; z-index:999999; background: rgba(0,0,0,0.65); color:#fff; padding:8px 10px; border-radius:8px; font-size:13px; cursor:pointer; user-select:none;} #dm-batch-status { position: fixed; right:12px; bottom:56px; z-index:999999; background: rgba(0,0,0,0.45); color:#fff; padding:6px 8px; border-radius:6px; font-size:12px; max-width:320px; word-break:break-word;} `; const s = document.createElement('style'); s.textContent = css; document.head && document.head.appendChild(s); const btn = document.createElement('div'); btn.id = 'dm-batch-btn'; btn.textContent = 'BatchSearch:停止'; btn.dataset.running = '1'; document.body.appendChild(btn); const status = document.createElement('div'); status.id = 'dm-batch-status'; status.textContent = '准备中...'; document.body.appendChild(status); btn.addEventListener('click', () => { const running = btn.dataset.running === '1'; btn.dataset.running = running ? '0' : '1'; btn.textContent = running ? 'BatchSearch:已停止' : 'BatchSearch:停止'; status.textContent = running ? '已手动停止' : '已启动'; stopFlag = running; // if was running and clicked -> set stopFlag true; if restarting, set false if (!stopFlag) { // restart loop if needed runBatchSearchLoop(status).catch(e => err(e)); } }); return { btn, status }; } /********************* 主流程:获取城市并循环搜索 *********************/ async function runBatchSearchLoop(statusNode) { try { stopFlag = (document.getElementById('dm-batch-btn') && document.getElementById('dm-batch-btn').dataset.running === '0'); // 获取 area list(仅在内存为空时获取) if (!areaList || !Array.isArray(areaList) || areaList.length === 0) { statusNode.textContent = '正在获取城市列表...'; try { const data = await gmGetJson(AREA_API); const normalizedAreaList = Array.isArray(data) ? data : (data && Array.isArray(data.data) ? data.data : []); if (normalizedAreaList.length > 0) { areaList = normalizedAreaList; log('获取城市列表数量:', areaList.length); statusNode.textContent = `获取到 ${areaList.length} 个城市,准备开始循环。`; } else { err('area API returned not array', data); statusNode.textContent = '获取城市列表失败(返回格式异常)'; return; } } catch (e) { err('获取城市列表失败', e); statusNode.textContent = '获取城市列表失败: ' + e.message; return; } } // 等待搜索输入与按钮可用 try { await ensureSearchControls(statusNode); } catch (e) { err('未找到搜索输入或按钮', e); statusNode.textContent = '未找到搜索输入或按钮,脚本仍会监听接口,但无法自动搜索。'; return; } // 主循环:对每个 city 执行搜索 -> 下滑 -> 发送结果 -> 下一 city for (let i = 0; i < areaList.length; i++) { if (stopFlag) { statusNode.textContent = '已停止'; break; } currentCityIndex = i; const city = (areaList[i].city || areaList[i].province || '').trim(); if (!city) continue; const keyword = `${city}律师`; statusNode.textContent = `正在搜索:${keyword} (${i+1}/${areaList.length})`; log(`开始城市[${i+1}/${areaList.length}] 搜索:`, keyword); // 将搜索词放入输入框 (触发 input 事件) try { await ensureSearchControls(statusNode); } catch (e) { err('刷新搜索控件失败', e); statusNode.textContent = '刷新搜索控件失败,终止批量搜索。'; break; } await simulateSearchInput(keyword); const triggered = simulateSearchTrigger(); if (!triggered) { statusNode.textContent = '搜索触发失败,尝试刷新控件...'; btnEl = null; await ensureSearchControls(statusNode); if (!simulateSearchTrigger()) { statusNode.textContent = '搜索触发失败,终止批量搜索。'; break; } } // 等待搜索结果开始加载 await new Promise(r => setTimeout(r, WAIT_AFTER_SEARCH_MS)); // 自动下滑直到稳定或达到上限 await autoScrollUntilStable(statusNode, MAX_SCROLLS_PER_CITY); if (stopFlag) { statusNode.textContent = '已停止'; break; } // 等待短暂间隔再进行下一个城市 statusNode.textContent = `完成 ${keyword} 的加载,等待 ${DELAY_BETWEEN_CITIES_MS} ms 后继续...`; await new Promise(r => setTimeout(r, DELAY_BETWEEN_CITIES_MS)); } statusNode.textContent = '批量搜索完成或已停止。'; log('批量搜索循环结束'); } catch (e) { err('runBatchSearchLoop error', e); } } /********************* 启动脚本 *********************/ (function init() { const ui = createUI(); ui.status.textContent = '就绪 - 点击右下按钮可停止/重启批量搜索'; console.log(location.pathname) // 如果当前为目标页面(/jingxuan/search/),则自动启动;否则仍可在任何页面打开并手动启动。 const isAutoPage = location.pathname && location.pathname.indexOf('/search/') !== -1; if (isAutoPage) { ui.status.textContent = '检测到 /jingxuan/search/ 页面,准备开始批量搜索...'; // 给页面一点时间加载必要脚本与 dom setTimeout(() => { runBatchSearchLoop(ui.status).catch(e => err(e)); }, 800); } else { // 非目标页面,仍可手动点击按钮(按钮初始化为运行状态,点击色变为已停止) ui.status.textContent = '非 /jingxuan/search/ 页面。导航至该页面或手动控制开始。'; } })(); })();