# 数据库连接配置 DB_CONFIG = { "host": "8.134.219.222", # 数据库地址 "user": "lawyer", # 数据库用户名 "password": "CTxr8yGwsSX3NdfJ", # 数据库密码 "database": "lawyer", # 数据库名称 "charset": "utf8mb4", } # 微信爬虫特定的配置 WEIXIN_CONFIG = { "TOKEN": "756858506", # 您的Token "FINGERPRINT": "1caa5fc52ac489e20a175e153dd3ef21", "COOKIE": { "appmsglist_action_3258147150": "card", "mm_lang": "zh_CN", "ts_uid": "8295434560", "markHashId_L": "417c7f0e-5d9f-4048-b844-28f78ed2a838", "_qimei_uuid42": "19b0d0b0c2d100de3df57d2afbc5018a9b4ae103e1", "_qimei_i_3": "59c5508a935b04dac7c1ab340fd172b5a5eba4f7160d5683e2867a5a7094713e616364943989e2a29e9f", "_qimei_h38": "b885c955f8e9995f103aac140200000421811e", "_qimei_i_1": "4ddd76d09d525588c892fb6653d17ae9feebf2f0125852d3e78e2c582493206c616333973981e3dd83abc2e0", "_qpsvr_localtk": "0.2780749298744084", "RK": "ZGEMOpzbOS", "ptcz": "90084a2b43c84a92d1b9082da98fd0e92369fcde4f2edbbc85661539c7917055", "pac_uid": "0_HXj3iphPm0Y4a", "_qimei_fingerprint": "bd1870aaecd7a9bb84aa53b9ad9a2c55", "rewardsn": "", "wxtokenkey": "777", "omgid": "0_HXj3iphPm0Y4a", "sig_login": "h01218fdccf5b63c15a6c5edb19ce20d0481c52723ee44ab56b9fc1415ff39c9ff0dd2000e12f1de8ae", "ua_id": "QXSOTQUjDFjoH63yAAAAAPILc15EwzRTwdqntEiCGSE=", "mp_token": "1331492699", "appletToken": "2105598806", "__wx_phantom_mark__": "breQbE92JS", "mmad_session": "2bd2e1824d701b521c16fa35de0378e55273ce93a68ac0cc9ca30e8ad5b2e9f6fc419dd5fed1cd17f0a57fc3c327e03ccf325c1e1e97dde41374a9d8067d9aa700c8b87a29b0d3caf7f949761d8f4eeb56a1e3ddbc5a5d3a573e5b83971cd92e11de1c56c245721266e7088080fefde3", "pgv_info": "ssid=s5739471549", "pgv_pvid": "2616937300", "_gcl_au": "1.1.954868153.1769494261", "wxuin": "69676812527831", "_clck": "3258147150|1|g35|0", "uuid": "e07aa2889db56b1901e1fb6b1286d9a7", "rand_info": "CAESIBnfIxLJoUVe5wP4SI/ADWnrnPUBlJDb4yyA7Et1+ZfF", "slave_bizuin": "3258147150", "data_bizuin": "3258147150", "bizuin": "3258147150", "data_ticket": "kv+SnLJADgPlcKQPIbYnfbEAxogpIMfAo/n0/HjtChnfDmQSogWvkO82/mUtzpcc", "slave_sid": "eFNMcEZ3bnhvRkppZVNkTDE4dFFnM0ZzdFM1REhpemZORHRnVnlnRHhKU29vY1ZBY0dJZkFHcXB5Nko4aV9pbVlnRTBRVDE0NzdIUDF4T3NTSDVzdXBJS2d3WFFuR3hiMWVVbG5ZTURfYmh3YTFTallIb2JXOWpyTWxXS25jbVFRVmtXWHVaWGdCN2lqZzVm", "slave_user": "gh_fe76760560d0", "xid": "34f577adf2c28e5b9f04de93c614c5c4", "_clsk": "639w4k|1769742296130|3|1|mp.weixin.qq.com/weheat-agent/payload/record" }, "COUNT": 20, # 单页条数 "REQUESTS_PER_SECOND": 8, # 每秒最大请求数(调高更快,但有风控风险) "PAGE_DELAY": 0.8, # 每页采集后的等待秒数 "CITY_DELAY": 0.3, # 每城市采集后的等待秒数 } # 通用请求头 HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7', 'X-Requested-With': 'XMLHttpRequest', } # 法律快车爬虫配置 LAWTIME_CONFIG = { "HEADERS": { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1" } } # Redis配置 - 用于采集索引和断点恢复 REDIS_CONFIG = { "host": "127.0.0.1", "port": 6379, "password": "", "db": 0, # 使用数据库0 "decode_responses": True, # 自动解码响应 "socket_timeout": 5, # 连接超时时间 "socket_connect_timeout": 5, # 连接建立超时时间 "health_check_interval": 30, # 健康检查间隔 "retry_on_timeout": True, # 超时重试 "max_connections": 20, # 最大连接数 } # Redis键名配置 REDIS_KEYS = { "spider_progress": "lawyer:spider:progress:{spider_name}", # 爬虫进度 "url_processed": "lawyer:url:processed:{spider_name}", # 已处理URL集合 "url_failed": "lawyer:url:failed:{spider_name}", # 失败URL集合 "spider_stats": "lawyer:stats:{spider_name}", # 爬虫统计信息 "global_stats": "lawyer:global:stats", # 全局统计 "session_info": "lawyer:session:{session_id}", # 会话信息 "url_queue": "lawyer:queue:{spider_name}", # URL队列 "duplicate_filter": "lawyer:duplicate:{spider_name}", # 去重过滤器 } # MongoDB配置 - 用于日志存储 MONGO_CONFIG = { "uri": "mongodb://127.0.0.1:27017/", "database": "lawyer", "collections": { "logs": "logs", # 通用日志 "spider_logs": "spider_logs", # 爬虫专用日志 "error_logs": "error_logs", # 错误日志 "system_logs": "system_logs", # 系统日志 "performance_logs": "performance_logs" # 性能日志 }, "options": { "maxPoolSize": 10, # 连接池最大连接数 "minPoolSize": 1, # 连接池最小连接数 "maxIdleTimeMS": 30000, # 最大空闲时间 "serverSelectionTimeoutMS": 5000, # 服务器选择超时 "connectTimeoutMS": 10000, # 连接超时 "socketTimeoutMS": 30000, # Socket超时 } }