chore: 暂存本地修改
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -26,64 +27,73 @@ DEFAULT_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/138.0.0.0 Safari/537.36"
|
||||
"Chrome/146.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7",
|
||||
"DNT": "1",
|
||||
"Priority": "u=1, i",
|
||||
"Sec-CH-UA": '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
|
||||
"Sec-CH-UA-Mobile": "?0",
|
||||
"Sec-CH-UA-Platform": '"Windows"',
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
DEFAULT_WEIXIN_CONFIG = {
|
||||
"TOKEN": "32299576",
|
||||
"FINGERPRINT": "64a1c659b8b944d6e7fe596b0794ab35",
|
||||
"TOKEN": "609153506",
|
||||
"FINGERPRINT": "46a7e6ac6ccf205986adc0aa99127860",
|
||||
"COOKIE": {
|
||||
"appmsglist_action_3876849679": "card",
|
||||
"appmsglist_action_3258147150": "card",
|
||||
"_qimei_uuid42": "1a302160d051008226aec905b63f99ff3989f30009",
|
||||
"_qimei_i_3": "63b22b84c15204dfc595ac6452d722b1f0bdf0f6145b568ae68a7c0e70947438686637943989e2a1d792",
|
||||
"_qimei_h38": "215986ce26aec905b63f99ff0200000e81a302",
|
||||
"ua_id": "S7gglu0eZh9NkAzLAAAAADH8dynpnFZVN29lxm7BQo0=",
|
||||
"wxuin": "73074968761097",
|
||||
"mm_lang": "zh_CN",
|
||||
"ts_uid": "8295434560",
|
||||
"markHashId_L": "417c7f0e-5d9f-4048-b844-28f78ed2a838",
|
||||
"_qimei_uuid42": "19b0d0b0c2d100de3df57d2afbc5018a9b4ae103e1",
|
||||
"_qimei_i_3": "59c5508a935b04dac7c1ab340fd172b5a5eba4f7160d5683e2867a5a7094713e616364943989e2a29e9f",
|
||||
"_qimei_h38": "b885c955f8e9995f103aac140200000421811e",
|
||||
"RK": "ZGEMOpzbOS",
|
||||
"ptcz": "90084a2b43c84a92d1b9082da98fd0e92369fcde4f2edbbc85661539c7917055",
|
||||
"pac_uid": "0_HXj3iphPm0Y4a",
|
||||
"_qimei_fingerprint": "bd1870aaecd7a9bb84aa53b9ad9a2c55",
|
||||
"wxuin": "70085167371972",
|
||||
"omgid": "0_HXj3iphPm0Y4a",
|
||||
"rewardsn": "",
|
||||
"wxtokenkey": "777",
|
||||
"sig_login": "h017c22e8921e6bf5a1f8659d9f34ee0db2be31cdcf03786b9ab4b787a9821ad84d3046473d9076181a",
|
||||
"_qpsvr_localtk": "0.9079082151544442",
|
||||
"appletToken": "880792228",
|
||||
"mmad_session": "ae5215dd3c930e6256d8f0656bd8497e719817e0df77a677766e128e2135218486f674b88b349db0d47039f54cb99c8753beb8d4b921ae452b66773db51ad3006ab1f0d19253ae83e2cb9ba53ff5b5b4f45f2fe160db66fd300a1fb4e04a92bd11de1c56c245721266e7088080fefde3",
|
||||
"qq_domain_video_guid_verify": "6cce52525a146907",
|
||||
"eas_sid": "91X7I7K4K5k364U2z3k2I980F5",
|
||||
"_qimei_q36": "",
|
||||
"pgv_info": "ssid=s4741843528",
|
||||
"pgv_pvid": "9337874960",
|
||||
"_qimei_i_2": "47e96bdff700",
|
||||
"_qimei_i_1": "40bb51d09d525588c892fb6653d17ae9feebf2f0125852d3e78e2c582493206c616333973981e3dd838fd0da",
|
||||
"_qimei_q32": "",
|
||||
"mp_token": "1555009133",
|
||||
"ua_id": "390pNywJFJA6BsgOAAAAADO0TqlmW7NBB1GD0Y7OVwk=",
|
||||
"__wx_phantom_mark__": "UTRZE71JZ7",
|
||||
"_clck": "3841887471|1|g4a|0",
|
||||
"uuid": "6ae7cb97104627c5d3b9d1d9ab2eef60",
|
||||
"rand_info": "CAESIGjvJyiJ58Ii0enQVKBwl6d4IyCrWeN7kzhIAVTgM2lc",
|
||||
"slave_bizuin": "3876849679",
|
||||
"data_bizuin": "3876849679",
|
||||
"bizuin": "3876849679",
|
||||
"data_ticket": "8wg11/LIrTLHAbJdbAH2HWdqlW/K2jijwP27oPSrH2myYNpuSR1NedfmSbzeq5go",
|
||||
"slave_sid": "TjBzVV83WThEaThRdUhlcFpqRFhQejFSUzRfOWdGa0l3S0dPSW41QWdkSk9qSkQ2ZTljbWRHa0poQ1lNTXlub25WMUJORVluVU5HaFBGRXVJS19yeG53SUNWWU14YjNQeWpxTUczalBHV1dTY0V3TDZ6aE14bFNaS2ExeGNhb3J0WlRWMlM4NnNmNGFST0ZD",
|
||||
"slave_user": "gh_6c1283858808",
|
||||
"xid": "116378d10877a35558158970698ca0c3",
|
||||
"_clsk": "3okzsf|1773282377657|6|1|mp.weixin.qq.com/weheat-agent/payload/record"
|
||||
},
|
||||
"COUNT": 20,
|
||||
"_qimei_fingerprint": "d895c46d5fda98cab67d9daec00068ed",
|
||||
"_clck": "501quy|1|g4t|0",
|
||||
"uuid": "210d1c199a63afd4c774eccd9a06a27f",
|
||||
"rand_info": "CAESIE4WqrFFVVjqrrNflbCUM7wPD5NXjuGbjfHolAEsMmEm",
|
||||
"slave_bizuin": "3258147150",
|
||||
"data_bizuin": "3258147150",
|
||||
"bizuin": "3258147150",
|
||||
"data_ticket": "tpcLjRB7B7AlUY3rFe/ILEjtCKs7dEEGsn8kXnHVzdTb9dgIpSPN1aP8FlE6FDhj",
|
||||
"slave_sid": "U3hfU1Z0UV91N0U5d0lkRDhyTzh3d3hmbnBHMjBnbmFNdzVJeGlJeTJ6OTVxRjJQVVE2VkNhejYzTkxETVVSZkF3eWRORmtRS01XWFBjdnFZZWFLNjR2ZGtwdUJ2MzByclg0NjF4SHlDeVJneEhsczdSYUJVNE45VEhNRWVTQXg1dlpGdWQ0bU5VM3pnRzJN",
|
||||
"slave_user": "gh_fe76760560d0",
|
||||
"xid": "ef503a6864cceaef225c615a45606e4a",
|
||||
"_clsk": "12arnf1|1774975723874|4|1|mp.weixin.qq.com/weheat-agent/payload/record",
|
||||
"_qimei_i_1": "2ddc6a80945f59d3c7c4ab325dd526b3feeea1a31458558bbdd97e582493206c6163629d39d8e1dcd49fddc7"
|
||||
},
|
||||
"COUNT": 21,
|
||||
"REFERER": "https://mp.weixin.qq.com/",
|
||||
"HEADERS": {},
|
||||
"REQUEST_PARAMS": {
|
||||
"action": "search",
|
||||
"scene": "1",
|
||||
"lang": "zh_CN",
|
||||
"f": "json",
|
||||
"ajax": "1",
|
||||
},
|
||||
"REQUESTS_PER_SECOND": 5,
|
||||
"PAGE_DELAY": 5,
|
||||
"CITY_DELAY": 2,
|
||||
}
|
||||
|
||||
|
||||
def _deep_merge_dict(base: Dict, incoming: Dict) -> Dict:
|
||||
merged = copy.deepcopy(base)
|
||||
for key, value in incoming.items():
|
||||
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
||||
merged[key] = _deep_merge_dict(merged[key], value)
|
||||
else:
|
||||
merged[key] = value
|
||||
return merged
|
||||
|
||||
|
||||
def _parse_cookie_value(cookie_value) -> Dict[str, str]:
|
||||
if isinstance(cookie_value, dict):
|
||||
return {str(key): str(value) for key, value in cookie_value.items()}
|
||||
@@ -110,15 +120,16 @@ def _parse_cookie_value(cookie_value) -> Dict[str, str]:
|
||||
|
||||
|
||||
def _load_weixin_config() -> Dict:
|
||||
config = DEFAULT_WEIXIN_CONFIG.copy()
|
||||
config = copy.deepcopy(DEFAULT_WEIXIN_CONFIG)
|
||||
module_config = getattr(project_config, "WEIXIN_CONFIG", None)
|
||||
if isinstance(module_config, dict):
|
||||
config.update(module_config)
|
||||
config = _deep_merge_dict(config, module_config)
|
||||
|
||||
env_mapping = {
|
||||
"TOKEN": os.getenv("WEIXIN_TOKEN"),
|
||||
"FINGERPRINT": os.getenv("WEIXIN_FINGERPRINT"),
|
||||
"COOKIE": os.getenv("WEIXIN_COOKIE"),
|
||||
"REFERER": os.getenv("WEIXIN_REFERER"),
|
||||
"COUNT": os.getenv("WEIXIN_COUNT"),
|
||||
"REQUESTS_PER_SECOND": os.getenv("WEIXIN_REQUESTS_PER_SECOND"),
|
||||
"PAGE_DELAY": os.getenv("WEIXIN_PAGE_DELAY"),
|
||||
@@ -161,17 +172,32 @@ class WeixinSpider:
|
||||
self.fingerprint = str(self.config.get("FINGERPRINT", "")).strip()
|
||||
self.cookies = self.config.get("COOKIE", {})
|
||||
self.count = str(self.config.get("COUNT", DEFAULT_WEIXIN_CONFIG["COUNT"]))
|
||||
self.referer = str(self.config.get("REFERER", DEFAULT_WEIXIN_CONFIG["REFERER"])).strip()
|
||||
self.request_params = {
|
||||
str(key): str(value)
|
||||
for key, value in (self.config.get("REQUEST_PARAMS", {}) or {}).items()
|
||||
if value is not None
|
||||
}
|
||||
self.page_delay = max(0.0, float(self.config.get("PAGE_DELAY", DEFAULT_WEIXIN_CONFIG["PAGE_DELAY"])))
|
||||
self.city_delay = max(0.0, float(self.config.get("CITY_DELAY", DEFAULT_WEIXIN_CONFIG["CITY_DELAY"])))
|
||||
max_rps = self.config.get("REQUESTS_PER_SECOND")
|
||||
if max_rps:
|
||||
global_rate_limiter.max_requests = int(max_rps)
|
||||
|
||||
headers = getattr(project_config, "HEADERS", DEFAULT_HEADERS).copy()
|
||||
headers["Referer"] = "https://mp.weixin.qq.com/"
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
project_headers = getattr(project_config, "HEADERS", None)
|
||||
if isinstance(project_headers, dict):
|
||||
headers.update(project_headers)
|
||||
config_headers = self.config.get("HEADERS", {})
|
||||
if isinstance(config_headers, dict):
|
||||
headers.update({str(key): str(value) for key, value in config_headers.items()})
|
||||
if self.referer:
|
||||
headers["Referer"] = self.referer
|
||||
self.session = requests.Session()
|
||||
self.session.trust_env = False
|
||||
self.session.headers.update(headers)
|
||||
if self.cookies:
|
||||
self.session.cookies.update(self.cookies)
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
def _validate_runtime_config(self) -> bool:
|
||||
@@ -214,18 +240,14 @@ class WeixinSpider:
|
||||
return []
|
||||
|
||||
def _build_query_url(self, query: str, buffer: str) -> str:
|
||||
params = {
|
||||
"action": "search",
|
||||
"scene": "1",
|
||||
params = self.request_params.copy()
|
||||
params.update({
|
||||
"query": query,
|
||||
"count": self.count,
|
||||
"buffer": buffer,
|
||||
"fingerprint": self.fingerprint,
|
||||
"token": self.token,
|
||||
"lang": "zh_CN",
|
||||
"f": "json",
|
||||
"ajax": "1",
|
||||
}
|
||||
})
|
||||
return f"{API_ENDPOINT}?{urlencode(params)}"
|
||||
|
||||
def _extract_phone(self, text: str) -> Optional[str]:
|
||||
|
||||
Reference in New Issue
Block a user