Add maxlaw PC spider and shared proxy limiter
This commit is contained in:
+8
-9
@@ -24,7 +24,7 @@ from request.proxy_config import get_proxies, report_proxy_status
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
from Db import Db
|
||||
from utils.rate_limiter import wait_for_request
|
||||
from utils.rate_limiter import request_slot
|
||||
|
||||
DOMAIN = "大律师"
|
||||
LIST_TEMPLATE = "https://m.maxlaw.cn/law/{pinyin}?page={page}"
|
||||
@@ -108,17 +108,16 @@ class DlsSpider:
|
||||
|
||||
def _get(self, url: str, max_retries: int = 3, headers: Optional[Dict[str, str]] = None) -> Optional[str]:
|
||||
"""发送 GET 请求,带重试机制"""
|
||||
wait_for_request()
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# 使用更长的超时时间,分别设置连接和读取超时
|
||||
resp = self.session.get(
|
||||
url,
|
||||
timeout=(10, 30), # (connect_timeout, read_timeout)
|
||||
verify=False,
|
||||
headers=headers,
|
||||
)
|
||||
with request_slot():
|
||||
resp = self.session.get(
|
||||
url,
|
||||
timeout=(10, 30), # (connect_timeout, read_timeout)
|
||||
verify=False,
|
||||
headers=headers,
|
||||
)
|
||||
status_code = resp.status_code
|
||||
content = resp.text
|
||||
resp.close()
|
||||
|
||||
Reference in New Issue
Block a user