Files
lawyers/config.py
T
2026-04-28 17:33:51 +08:00

115 lines
5.0 KiB
Python

# 数据库连接配置
DB_CONFIG = {
"host": "8.134.219.222", # 数据库地址
"user": "lawyer", # 数据库用户名
"password": "CTxr8yGwsSX3NdfJ", # 数据库密码
"database": "lawyer", # 数据库名称
"charset": "utf8mb4",
}
# 高德地图 API 配置
GAODE_CONFIG = {
"API_KEY": "f261575fb28003761c433f6c9379e89d",
}
# 微信爬虫特定的配置
WEIXIN_CONFIG = {
"TOKEN": "553117235", # 您的Token
"FINGERPRINT": "3c02c35093184e9a9a668ac3c81e53f9",
"COOKIE": {
"appmsglist_action_3258147150": "card",
"_qimei_uuid42": "1a302160d051008226aec905b63f99ff3989f30009",
"_qimei_i_3": "63b22b84c15204dfc595ac6452d722b1f0bdf0f6145b568ae68a7c0e70947438686637943989e2a1d792",
"_qimei_h38": "215986ce26aec905b63f99ff0200000e81a302",
"ua_id": "S7gglu0eZh9NkAzLAAAAADH8dynpnFZVN29lxm7BQo0=",
"wxuin": "73074968761097",
"mm_lang": "zh_CN",
"eas_sid": "91X7I7K4K5k364U2z3k2I980F5",
"_qimei_q36": "",
"_qimei_fingerprint": "d895c46d5fda98cab67d9daec00068ed",
"_qimei_i_1": "4dc76680945f59d3c7c4ab325dd526b3feeea1a31458558bbdd97e582493206c6163629d39d8e1dcd4b2c28f",
"pgv_pvid": "6923507145",
"ts_uid": "9585717820",
"_t_qbtool_uid": "aaaa2vn5byd280l00iglw701zci788cb",
"_ga": "GA1.1.1323926288.1775838938",
"_ga_TPFW0KPXC1": "GS2.1.s1775841484$o2$g1$t1775841485$j59$l0$h0",
"uuid": "20d1cfb540221c6e7b6d665ab1d4a8f7",
"rand_info": "CAESIA8LYV6dvWh5dYrgQLPhZb8TXwUJoWdcdDzN0TTdztSj",
"slave_bizuin": "3258147150",
"data_bizuin": "3258147150",
"bizuin": "3258147150",
"data_ticket": "dgLFmSrI8f1q6JnYOd2Y/sKJIWjh6YlLSau1n1+Mv5iOTR5hgsm1qjNLypWflGd6",
"slave_sid": "VGVnNmM5NmFpV19ESElmVlZOTGZfVVJfWE5HanlHNjN0WEswZVkxVk9vc2FTenQzVGRsWUxDT0xGQVBJRVZzU0JNVV9RckRJVE9jSVUwbjl4Z2VHaEZKSzE5WVc3THRCRW96T0Z1V1VwbnBLSnkxSWdKaHdaN1dYdzI1SmdpZ0IyOFJtUE45OTR2Q2NvM1FB",
"slave_user": "gh_fe76760560d0",
"xid": "4893c62dc8518b6a1628fd34bc9aa276",
"_clck": "3258147150|1|g5g|0",
"_clsk": "1p4oo3h|1776957001796|5|1|mp.weixin.qq.com/weheat-agent/payload/record"
},
"COUNT": 20, # 单页条数
"REQUESTS_PER_SECOND": 8, # 每秒最大请求数(调高更快,但有风控风险)
"PAGE_DELAY": 0.8, # 每页采集后的等待秒数
"CITY_DELAY": 0.3, # 每城市采集后的等待秒数
}
# 通用请求头
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
'X-Requested-With': 'XMLHttpRequest',
}
# 法律快车爬虫配置
LAWTIME_CONFIG = {
"HEADERS": {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"
}
}
# Redis配置 - 用于采集索引和断点恢复
REDIS_CONFIG = {
"host": "127.0.0.1",
"port": 6379,
"password": "",
"db": 0, # 使用数据库0
"decode_responses": True, # 自动解码响应
"socket_timeout": 5, # 连接超时时间
"socket_connect_timeout": 5, # 连接建立超时时间
"health_check_interval": 30, # 健康检查间隔
"retry_on_timeout": True, # 超时重试
"max_connections": 20, # 最大连接数
}
# Redis键名配置
REDIS_KEYS = {
"spider_progress": "lawyer:spider:progress:{spider_name}", # 爬虫进度
"url_processed": "lawyer:url:processed:{spider_name}", # 已处理URL集合
"url_failed": "lawyer:url:failed:{spider_name}", # 失败URL集合
"spider_stats": "lawyer:stats:{spider_name}", # 爬虫统计信息
"global_stats": "lawyer:global:stats", # 全局统计
"session_info": "lawyer:session:{session_id}", # 会话信息
"url_queue": "lawyer:queue:{spider_name}", # URL队列
"duplicate_filter": "lawyer:duplicate:{spider_name}", # 去重过滤器
}
# MongoDB配置 - 用于日志存储
MONGO_CONFIG = {
"uri": "mongodb://127.0.0.1:27017/",
"database": "lawyer",
"collections": {
"logs": "logs", # 通用日志
"spider_logs": "spider_logs", # 爬虫专用日志
"error_logs": "error_logs", # 错误日志
"system_logs": "system_logs", # 系统日志
"performance_logs": "performance_logs" # 性能日志
},
"options": {
"maxPoolSize": 10, # 连接池最大连接数
"minPoolSize": 1, # 连接池最小连接数
"maxIdleTimeMS": 30000, # 最大空闲时间
"serverSelectionTimeoutMS": 5000, # 服务器选择超时
"connectTimeoutMS": 10000, # 连接超时
"socketTimeoutMS": 30000, # Socket超时
}
}