chore: 暂存本地修改

This commit is contained in:
hello-dd-code
2026-04-28 17:33:51 +08:00
parent ba04fe42fc
commit f67cb30f0d
15 changed files with 1139 additions and 97 deletions
+76 -54
View File
@@ -1,3 +1,4 @@
import copy
import json
import os
import re
@@ -26,64 +27,73 @@ DEFAULT_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/138.0.0.0 Safari/537.36"
"Chrome/146.0.0.0 Safari/537.36"
),
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7",
"DNT": "1",
"Priority": "u=1, i",
"Sec-CH-UA": '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"X-Requested-With": "XMLHttpRequest",
}
DEFAULT_WEIXIN_CONFIG = {
"TOKEN": "32299576",
"FINGERPRINT": "64a1c659b8b944d6e7fe596b0794ab35",
"TOKEN": "609153506",
"FINGERPRINT": "46a7e6ac6ccf205986adc0aa99127860",
"COOKIE": {
"appmsglist_action_3876849679": "card",
"appmsglist_action_3258147150": "card",
"_qimei_uuid42": "1a302160d051008226aec905b63f99ff3989f30009",
"_qimei_i_3": "63b22b84c15204dfc595ac6452d722b1f0bdf0f6145b568ae68a7c0e70947438686637943989e2a1d792",
"_qimei_h38": "215986ce26aec905b63f99ff0200000e81a302",
"ua_id": "S7gglu0eZh9NkAzLAAAAADH8dynpnFZVN29lxm7BQo0=",
"wxuin": "73074968761097",
"mm_lang": "zh_CN",
"ts_uid": "8295434560",
"markHashId_L": "417c7f0e-5d9f-4048-b844-28f78ed2a838",
"_qimei_uuid42": "19b0d0b0c2d100de3df57d2afbc5018a9b4ae103e1",
"_qimei_i_3": "59c5508a935b04dac7c1ab340fd172b5a5eba4f7160d5683e2867a5a7094713e616364943989e2a29e9f",
"_qimei_h38": "b885c955f8e9995f103aac140200000421811e",
"RK": "ZGEMOpzbOS",
"ptcz": "90084a2b43c84a92d1b9082da98fd0e92369fcde4f2edbbc85661539c7917055",
"pac_uid": "0_HXj3iphPm0Y4a",
"_qimei_fingerprint": "bd1870aaecd7a9bb84aa53b9ad9a2c55",
"wxuin": "70085167371972",
"omgid": "0_HXj3iphPm0Y4a",
"rewardsn": "",
"wxtokenkey": "777",
"sig_login": "h017c22e8921e6bf5a1f8659d9f34ee0db2be31cdcf03786b9ab4b787a9821ad84d3046473d9076181a",
"_qpsvr_localtk": "0.9079082151544442",
"appletToken": "880792228",
"mmad_session": "ae5215dd3c930e6256d8f0656bd8497e719817e0df77a677766e128e2135218486f674b88b349db0d47039f54cb99c8753beb8d4b921ae452b66773db51ad3006ab1f0d19253ae83e2cb9ba53ff5b5b4f45f2fe160db66fd300a1fb4e04a92bd11de1c56c245721266e7088080fefde3",
"qq_domain_video_guid_verify": "6cce52525a146907",
"eas_sid": "91X7I7K4K5k364U2z3k2I980F5",
"_qimei_q36": "",
"pgv_info": "ssid=s4741843528",
"pgv_pvid": "9337874960",
"_qimei_i_2": "47e96bdff700",
"_qimei_i_1": "40bb51d09d525588c892fb6653d17ae9feebf2f0125852d3e78e2c582493206c616333973981e3dd838fd0da",
"_qimei_q32": "",
"mp_token": "1555009133",
"ua_id": "390pNywJFJA6BsgOAAAAADO0TqlmW7NBB1GD0Y7OVwk=",
"__wx_phantom_mark__": "UTRZE71JZ7",
"_clck": "3841887471|1|g4a|0",
"uuid": "6ae7cb97104627c5d3b9d1d9ab2eef60",
"rand_info": "CAESIGjvJyiJ58Ii0enQVKBwl6d4IyCrWeN7kzhIAVTgM2lc",
"slave_bizuin": "3876849679",
"data_bizuin": "3876849679",
"bizuin": "3876849679",
"data_ticket": "8wg11/LIrTLHAbJdbAH2HWdqlW/K2jijwP27oPSrH2myYNpuSR1NedfmSbzeq5go",
"slave_sid": "TjBzVV83WThEaThRdUhlcFpqRFhQejFSUzRfOWdGa0l3S0dPSW41QWdkSk9qSkQ2ZTljbWRHa0poQ1lNTXlub25WMUJORVluVU5HaFBGRXVJS19yeG53SUNWWU14YjNQeWpxTUczalBHV1dTY0V3TDZ6aE14bFNaS2ExeGNhb3J0WlRWMlM4NnNmNGFST0ZD",
"slave_user": "gh_6c1283858808",
"xid": "116378d10877a35558158970698ca0c3",
"_clsk": "3okzsf|1773282377657|6|1|mp.weixin.qq.com/weheat-agent/payload/record"
},
"COUNT": 20,
"_qimei_fingerprint": "d895c46d5fda98cab67d9daec00068ed",
"_clck": "501quy|1|g4t|0",
"uuid": "210d1c199a63afd4c774eccd9a06a27f",
"rand_info": "CAESIE4WqrFFVVjqrrNflbCUM7wPD5NXjuGbjfHolAEsMmEm",
"slave_bizuin": "3258147150",
"data_bizuin": "3258147150",
"bizuin": "3258147150",
"data_ticket": "tpcLjRB7B7AlUY3rFe/ILEjtCKs7dEEGsn8kXnHVzdTb9dgIpSPN1aP8FlE6FDhj",
"slave_sid": "U3hfU1Z0UV91N0U5d0lkRDhyTzh3d3hmbnBHMjBnbmFNdzVJeGlJeTJ6OTVxRjJQVVE2VkNhejYzTkxETVVSZkF3eWRORmtRS01XWFBjdnFZZWFLNjR2ZGtwdUJ2MzByclg0NjF4SHlDeVJneEhsczdSYUJVNE45VEhNRWVTQXg1dlpGdWQ0bU5VM3pnRzJN",
"slave_user": "gh_fe76760560d0",
"xid": "ef503a6864cceaef225c615a45606e4a",
"_clsk": "12arnf1|1774975723874|4|1|mp.weixin.qq.com/weheat-agent/payload/record",
"_qimei_i_1": "2ddc6a80945f59d3c7c4ab325dd526b3feeea1a31458558bbdd97e582493206c6163629d39d8e1dcd49fddc7"
},
"COUNT": 21,
"REFERER": "https://mp.weixin.qq.com/",
"HEADERS": {},
"REQUEST_PARAMS": {
"action": "search",
"scene": "1",
"lang": "zh_CN",
"f": "json",
"ajax": "1",
},
"REQUESTS_PER_SECOND": 5,
"PAGE_DELAY": 5,
"CITY_DELAY": 2,
}
def _deep_merge_dict(base: Dict, incoming: Dict) -> Dict:
merged = copy.deepcopy(base)
for key, value in incoming.items():
if isinstance(value, dict) and isinstance(merged.get(key), dict):
merged[key] = _deep_merge_dict(merged[key], value)
else:
merged[key] = value
return merged
def _parse_cookie_value(cookie_value) -> Dict[str, str]:
if isinstance(cookie_value, dict):
return {str(key): str(value) for key, value in cookie_value.items()}
@@ -110,15 +120,16 @@ def _parse_cookie_value(cookie_value) -> Dict[str, str]:
def _load_weixin_config() -> Dict:
config = DEFAULT_WEIXIN_CONFIG.copy()
config = copy.deepcopy(DEFAULT_WEIXIN_CONFIG)
module_config = getattr(project_config, "WEIXIN_CONFIG", None)
if isinstance(module_config, dict):
config.update(module_config)
config = _deep_merge_dict(config, module_config)
env_mapping = {
"TOKEN": os.getenv("WEIXIN_TOKEN"),
"FINGERPRINT": os.getenv("WEIXIN_FINGERPRINT"),
"COOKIE": os.getenv("WEIXIN_COOKIE"),
"REFERER": os.getenv("WEIXIN_REFERER"),
"COUNT": os.getenv("WEIXIN_COUNT"),
"REQUESTS_PER_SECOND": os.getenv("WEIXIN_REQUESTS_PER_SECOND"),
"PAGE_DELAY": os.getenv("WEIXIN_PAGE_DELAY"),
@@ -161,17 +172,32 @@ class WeixinSpider:
self.fingerprint = str(self.config.get("FINGERPRINT", "")).strip()
self.cookies = self.config.get("COOKIE", {})
self.count = str(self.config.get("COUNT", DEFAULT_WEIXIN_CONFIG["COUNT"]))
self.referer = str(self.config.get("REFERER", DEFAULT_WEIXIN_CONFIG["REFERER"])).strip()
self.request_params = {
str(key): str(value)
for key, value in (self.config.get("REQUEST_PARAMS", {}) or {}).items()
if value is not None
}
self.page_delay = max(0.0, float(self.config.get("PAGE_DELAY", DEFAULT_WEIXIN_CONFIG["PAGE_DELAY"])))
self.city_delay = max(0.0, float(self.config.get("CITY_DELAY", DEFAULT_WEIXIN_CONFIG["CITY_DELAY"])))
max_rps = self.config.get("REQUESTS_PER_SECOND")
if max_rps:
global_rate_limiter.max_requests = int(max_rps)
headers = getattr(project_config, "HEADERS", DEFAULT_HEADERS).copy()
headers["Referer"] = "https://mp.weixin.qq.com/"
headers = DEFAULT_HEADERS.copy()
project_headers = getattr(project_config, "HEADERS", None)
if isinstance(project_headers, dict):
headers.update(project_headers)
config_headers = self.config.get("HEADERS", {})
if isinstance(config_headers, dict):
headers.update({str(key): str(value) for key, value in config_headers.items()})
if self.referer:
headers["Referer"] = self.referer
self.session = requests.Session()
self.session.trust_env = False
self.session.headers.update(headers)
if self.cookies:
self.session.cookies.update(self.cookies)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def _validate_runtime_config(self) -> bool:
@@ -214,18 +240,14 @@ class WeixinSpider:
return []
def _build_query_url(self, query: str, buffer: str) -> str:
params = {
"action": "search",
"scene": "1",
params = self.request_params.copy()
params.update({
"query": query,
"count": self.count,
"buffer": buffer,
"fingerprint": self.fingerprint,
"token": self.token,
"lang": "zh_CN",
"f": "json",
"ajax": "1",
}
})
return f"{API_ENDPOINT}?{urlencode(params)}"
def _extract_phone(self, text: str) -> Optional[str]: