Add maxlaw PC spider and shared proxy limiter
This commit is contained in:
@@ -20,6 +20,7 @@ from request.proxy_config import get_proxies, report_proxy_status
|
||||
|
||||
from Db import Db
|
||||
from config import HEADERS
|
||||
from utils.rate_limiter import request_slot
|
||||
|
||||
LIST_URL = "https://m.66law.cn/findlawyer/rpc/loadlawyerlist/"
|
||||
DOMAIN = "华律"
|
||||
@@ -100,7 +101,8 @@ class HualvSpider:
|
||||
def _post(self, data: Dict[str, str], max_retries: int = 3) -> Optional[Dict]:
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = self.session.post(LIST_URL, data=data, timeout=20, verify=False)
|
||||
with request_slot():
|
||||
resp = self.session.post(LIST_URL, data=data, timeout=20, verify=False)
|
||||
status_code = resp.status_code
|
||||
text = resp.text
|
||||
resp.close()
|
||||
@@ -272,7 +274,8 @@ class HualvSpider:
|
||||
def _get_detail(self, url: str, max_retries: int = 3) -> Optional[str]:
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = self.session.get(url, timeout=15, verify=False)
|
||||
with request_slot():
|
||||
resp = self.session.get(url, timeout=15, verify=False)
|
||||
status_code = resp.status_code
|
||||
text = resp.text
|
||||
resp.close()
|
||||
|
||||
Reference in New Issue
Block a user