Add maxlaw PC spider and shared proxy limiter
This commit is contained in:
@@ -22,7 +22,7 @@ if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from request.requests_client import RequestClientError, RequestsClient
|
||||
from utils.rate_limiter import wait_for_request
|
||||
from utils.rate_limiter import request_slot
|
||||
from Db import Db
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
@@ -107,9 +107,9 @@ class DlsFreshCrawler:
|
||||
def _get_text(self, url: str, timeout: int = 20, max_retries: int = 3) -> str:
|
||||
last_error: Optional[Exception] = None
|
||||
for attempt in range(max_retries):
|
||||
wait_for_request()
|
||||
try:
|
||||
resp = self.client.get_text(url, timeout=timeout, verify=False)
|
||||
with request_slot():
|
||||
resp = self.client.get_text(url, timeout=timeout, verify=False)
|
||||
code = resp.status_code
|
||||
if code == 403:
|
||||
if attempt < max_retries - 1:
|
||||
|
||||
Reference in New Issue
Block a user