# 数据库连接配置 DB_CONFIG = { "host": "8.134.219.222", # 数据库地址 "user": "lawyer", # 数据库用户名 "password": "CTxr8yGwsSX3NdfJ", # 数据库密码 "database": "lawyer", # 数据库名称 "charset": "utf8mb4", } # 高德地图 API 配置 GAODE_CONFIG = { "API_KEY": "f261575fb28003761c433f6c9379e89d", } # 微信爬虫特定的配置 WEIXIN_CONFIG = { "TOKEN": "553117235", # 您的Token "FINGERPRINT": "3c02c35093184e9a9a668ac3c81e53f9", "COOKIE": { "appmsglist_action_3258147150": "card", "_qimei_uuid42": "1a302160d051008226aec905b63f99ff3989f30009", "_qimei_i_3": "63b22b84c15204dfc595ac6452d722b1f0bdf0f6145b568ae68a7c0e70947438686637943989e2a1d792", "_qimei_h38": "215986ce26aec905b63f99ff0200000e81a302", "ua_id": "S7gglu0eZh9NkAzLAAAAADH8dynpnFZVN29lxm7BQo0=", "wxuin": "73074968761097", "mm_lang": "zh_CN", "eas_sid": "91X7I7K4K5k364U2z3k2I980F5", "_qimei_q36": "", "_qimei_fingerprint": "d895c46d5fda98cab67d9daec00068ed", "_qimei_i_1": "4dc76680945f59d3c7c4ab325dd526b3feeea1a31458558bbdd97e582493206c6163629d39d8e1dcd4b2c28f", "pgv_pvid": "6923507145", "ts_uid": "9585717820", "_t_qbtool_uid": "aaaa2vn5byd280l00iglw701zci788cb", "_ga": "GA1.1.1323926288.1775838938", "_ga_TPFW0KPXC1": "GS2.1.s1775841484$o2$g1$t1775841485$j59$l0$h0", "uuid": "20d1cfb540221c6e7b6d665ab1d4a8f7", "rand_info": "CAESIA8LYV6dvWh5dYrgQLPhZb8TXwUJoWdcdDzN0TTdztSj", "slave_bizuin": "3258147150", "data_bizuin": "3258147150", "bizuin": "3258147150", "data_ticket": "dgLFmSrI8f1q6JnYOd2Y/sKJIWjh6YlLSau1n1+Mv5iOTR5hgsm1qjNLypWflGd6", "slave_sid": "VGVnNmM5NmFpV19ESElmVlZOTGZfVVJfWE5HanlHNjN0WEswZVkxVk9vc2FTenQzVGRsWUxDT0xGQVBJRVZzU0JNVV9RckRJVE9jSVUwbjl4Z2VHaEZKSzE5WVc3THRCRW96T0Z1V1VwbnBLSnkxSWdKaHdaN1dYdzI1SmdpZ0IyOFJtUE45OTR2Q2NvM1FB", "slave_user": "gh_fe76760560d0", "xid": "4893c62dc8518b6a1628fd34bc9aa276", "_clck": "3258147150|1|g5g|0", "_clsk": "1p4oo3h|1776957001796|5|1|mp.weixin.qq.com/weheat-agent/payload/record" }, "COUNT": 20, # 单页条数 "REQUESTS_PER_SECOND": 8, # 每秒最大请求数(调高更快,但有风控风险) "PAGE_DELAY": 0.8, # 每页采集后的等待秒数 "CITY_DELAY": 0.3, # 每城市采集后的等待秒数 } # 通用请求头 HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7', 'X-Requested-With': 'XMLHttpRequest', } # 法律快车爬虫配置 LAWTIME_CONFIG = { "HEADERS": { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1" } } # Redis配置 - 用于采集索引和断点恢复 REDIS_CONFIG = { "host": "127.0.0.1", "port": 6379, "password": "", "db": 0, # 使用数据库0 "decode_responses": True, # 自动解码响应 "socket_timeout": 5, # 连接超时时间 "socket_connect_timeout": 5, # 连接建立超时时间 "health_check_interval": 30, # 健康检查间隔 "retry_on_timeout": True, # 超时重试 "max_connections": 20, # 最大连接数 } # Redis键名配置 REDIS_KEYS = { "spider_progress": "lawyer:spider:progress:{spider_name}", # 爬虫进度 "url_processed": "lawyer:url:processed:{spider_name}", # 已处理URL集合 "url_failed": "lawyer:url:failed:{spider_name}", # 失败URL集合 "spider_stats": "lawyer:stats:{spider_name}", # 爬虫统计信息 "global_stats": "lawyer:global:stats", # 全局统计 "session_info": "lawyer:session:{session_id}", # 会话信息 "url_queue": "lawyer:queue:{spider_name}", # URL队列 "duplicate_filter": "lawyer:duplicate:{spider_name}", # 去重过滤器 } # MongoDB配置 - 用于日志存储 MONGO_CONFIG = { "uri": "mongodb://127.0.0.1:27017/", "database": "lawyer", "collections": { "logs": "logs", # 通用日志 "spider_logs": "spider_logs", # 爬虫专用日志 "error_logs": "error_logs", # 错误日志 "system_logs": "system_logs", # 系统日志 "performance_logs": "performance_logs" # 性能日志 }, "options": { "maxPoolSize": 10, # 连接池最大连接数 "minPoolSize": 1, # 连接池最小连接数 "maxIdleTimeMS": 30000, # 最大空闲时间 "serverSelectionTimeoutMS": 5000, # 服务器选择超时 "connectTimeoutMS": 10000, # 连接超时 "socketTimeoutMS": 30000, # Socket超时 } }