Add maxlaw PC spider and shared proxy limiter

This commit is contained in:
hello-dd-code
2026-04-03 16:06:28 +08:00
parent ff5e04d986
commit ba04fe42fc
9 changed files with 651 additions and 78 deletions
+8 -9
View File
@@ -24,7 +24,7 @@ from request.proxy_config import get_proxies, report_proxy_status
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from Db import Db
from utils.rate_limiter import wait_for_request
from utils.rate_limiter import request_slot
DOMAIN = "大律师"
LIST_TEMPLATE = "https://m.maxlaw.cn/law/{pinyin}?page={page}"
@@ -108,17 +108,16 @@ class DlsSpider:
def _get(self, url: str, max_retries: int = 3, headers: Optional[Dict[str, str]] = None) -> Optional[str]:
"""发送 GET 请求,带重试机制"""
wait_for_request()
for attempt in range(max_retries):
try:
# 使用更长的超时时间,分别设置连接和读取超时
resp = self.session.get(
url,
timeout=(10, 30), # (connect_timeout, read_timeout)
verify=False,
headers=headers,
)
with request_slot():
resp = self.session.get(
url,
timeout=(10, 30), # (connect_timeout, read_timeout)
verify=False,
headers=headers,
)
status_code = resp.status_code
content = resp.text
resp.close()