feat: enhance project configuration and improve data export functionality

- Updated `.gitignore` to streamline ignored files and added logging for common sites.
- Expanded `config.py` with new configurations for Weixin and Redis, and improved database connection settings.
- Refined `README.md` to clarify project structure and usage instructions.
- Enhanced `requirements.txt` with additional dependencies for MongoDB and Redis support.
- Refactored multiple spider scripts to utilize a session-based approach for HTTP requests, improving error handling and proxy management.
- Updated `export_lawyers_excel.py` to include a default timestamp for data exports.
This commit is contained in:
hello-dd-code
2026-03-18 10:02:25 +08:00
parent c2b77975c1
commit 38e7c284e8
14 changed files with 1665 additions and 3004 deletions
+236 -586
View File
@@ -1,16 +1,13 @@
import argparse
import hashlib
import json
import os
import random
import re
import sys
import time
from dataclasses import dataclass, field
from typing import Dict, Iterable, List, Optional, Set, Tuple
import urllib3
from bs4 import BeautifulSoup
import random
from typing import Dict, Optional, List, Set
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
@@ -20,628 +17,281 @@ if request_dir not in sys.path:
if project_root not in sys.path:
sys.path.append(project_root)
from Db import Db
from request.requests_client import RequestClientError, RequestsClient
from utils.rate_limiter import wait_for_request
import requests
import urllib3
from bs4 import BeautifulSoup
from request.proxy_config import get_proxies, report_proxy_status
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
SITE_NAME = "lawtime"
LEGACY_DOMAIN = "法律快车"
SITE_BASE = "https://www.lawtime.cn"
PROVINCE_API = "https://www.lawtime.cn/public/stationIndex/getAreaList?areacode=0"
CITY_API_TEMPLATE = "https://www.lawtime.cn/public/stationIndex/getAreaList?areacode={province_id}"
LIST_URL_TEMPLATE = SITE_BASE + "/{city_py}/lawyer"
from Db import Db
from config import LAWTIME_CONFIG
PHONE_RE = re.compile(r"1[3-9]\d{9}")
YEAR_RE = re.compile(r"执业\s*(\d+)\s*年")
LIST_BASE = "https://m.lawtime.cn/{pinyin}/lawyer/?page={page}"
DETAIL_BASE = "https://m.lawtime.cn"
DOMAIN = "法律快车"
@dataclass
class CityTarget:
province_id: str
province_name: str
province_py: str
city_id: str
city_name: str
city_py: str
@dataclass
class ListCard:
detail_url: str
name: str
phone: str
address: str = ""
specialties: List[str] = field(default_factory=list)
metric_text: str = ""
def normalize_phone(text: str) -> str:
compact = re.sub(r"\D", "", text or "")
match = PHONE_RE.search(compact)
return match.group(0) if match else ""
class LawtimeCrawler:
def __init__(
self,
max_pages: int = 9999,
sleep_seconds: float = 0.1,
use_proxy: bool = True,
db_connection=None,
):
self.max_pages = max_pages
self.sleep_seconds = max(0.0, sleep_seconds)
class LawtimeSpider:
def __init__(self, db_connection):
self.db = db_connection
self.client = RequestsClient(
headers={
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/122.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/json,*/*;q=0.8",
"Connection": "close",
},
use_proxy=use_proxy,
retry_total=2,
retry_backoff_factor=1,
retry_status_forcelist=(429, 500, 502, 503, 504),
retry_allowed_methods=("GET",),
)
self.session = self._build_session()
self.max_workers = int(os.getenv("SPIDER_WORKERS", "8"))
self._tls = threading.local()
def _get_text(
self,
url: str,
*,
timeout: int = 20,
max_retries: int = 3,
referer: str = SITE_BASE,
) -> str:
headers = {"Referer": referer}
last_error: Optional[Exception] = None
def _build_session(self) -> requests.Session:
report_proxy_status()
session = requests.Session()
session.trust_env = False
proxies = get_proxies()
if proxies:
session.proxies.update(proxies)
else:
session.proxies.clear()
headers = LAWTIME_CONFIG.get("HEADERS", {})
if headers:
session.headers.update(headers)
session.headers.setdefault("Connection", "close")
return session
for attempt in range(max_retries):
wait_for_request()
try:
resp = self.client.get_text(
url,
timeout=timeout,
verify=False,
headers=headers,
)
code = resp.status_code
if code == 403:
if attempt < max_retries - 1:
self.client.refresh()
time.sleep((2 ** attempt) + random.uniform(0.2, 0.8))
continue
raise RequestClientError(f"{code} Error: {url}")
if code >= 500 and attempt < max_retries - 1:
time.sleep((2 ** attempt) + random.uniform(0.2, 0.8))
continue
if code >= 400:
raise RequestClientError(f"{code} Error: {url}")
return resp.text
except Exception as exc:
last_error = exc
if attempt < max_retries - 1:
time.sleep((2 ** attempt) + random.uniform(0.2, 0.8))
continue
raise
if last_error is not None:
raise last_error
raise RequestClientError(f"Unknown request error: {url}")
def _get_json(self, url: str, *, referer: str) -> List[Dict]:
text = self._get_text(url, referer=referer)
cleaned = (text or "").strip().lstrip("\ufeff")
if not cleaned or cleaned.startswith("<"):
return []
def _refresh_session(self) -> None:
try:
data = json.loads(cleaned)
except ValueError:
return []
return data if isinstance(data, list) else []
self.session.close()
except Exception:
pass
self.session = self._build_session()
def discover_cities(self) -> List[CityTarget]:
provinces = self._get_json(PROVINCE_API, referer=SITE_BASE)
if not provinces:
print("[discover] 地区接口未返回有效数据")
return []
def _get_thread_session(self) -> requests.Session:
s = getattr(self._tls, "session", None)
if s is not None:
return s
s = self._build_session()
s.headers.update(dict(self.session.headers))
self._tls.session = s
return s
results: List[CityTarget] = []
seen_py: Set[str] = set()
for province in provinces:
province_id = str(province.get("id") or "").strip()
province_name = str(province.get("province") or province.get("city") or "").strip()
province_py = str(province.get("pinyin") or "").strip()
if not province_id or not province_name:
continue
city_api = CITY_API_TEMPLATE.format(province_id=province_id)
def _refresh_thread_session(self) -> None:
s = getattr(self._tls, "session", None)
if s is not None:
try:
cities = self._get_json(city_api, referer=LIST_URL_TEMPLATE.format(city_py=province_py or ""))
except Exception as exc:
print(f"[city] 获取失败 province={province_id}: {exc}")
continue
if not cities:
cities = [
{
"id": province_id,
"province": province_name,
"city": province_name,
"pinyin": province_py,
}
]
for city in cities:
city_id = str(city.get("id") or "").strip()
city_name = str(city.get("city") or city.get("province") or "").strip()
city_py = str(city.get("pinyin") or "").strip()
if not city_id or not city_name or not city_py:
continue
if city_py in seen_py:
continue
seen_py.add(city_py)
results.append(
CityTarget(
province_id=province_id,
province_name=province_name,
province_py=province_py,
city_id=city_id,
city_name=city_name,
city_py=city_py,
)
)
return results
def _build_list_url(self, city_py: str, page: int) -> str:
base = LIST_URL_TEMPLATE.format(city_py=city_py)
if page <= 1:
return base
return f"{base}?page={page}"
def fetch_list_page(self, target: CityTarget, page: int) -> Tuple[List[ListCard], bool, str]:
list_url = self._build_list_url(target.city_py, page)
html = self._get_text(list_url, referer=SITE_BASE + "/")
cards = self.parse_list_cards(html)
soup = BeautifulSoup(html, "html.parser")
next_link = soup.select_one(f"div.page a[href*='page={page + 1}']")
has_next = next_link is not None
return cards, has_next, list_url
def parse_list_cards(self, html: str) -> List[ListCard]:
soup = BeautifulSoup(html, "html.parser")
cards: List[ListCard] = []
seen: Set[str] = set()
for item in soup.select("li.lawyer-item-card"):
link_tag = item.select_one("a.name[href]") or item.select_one("a.lawyer-img-box[href]")
if not link_tag:
continue
detail_url = (link_tag.get("href") or "").strip()
if not detail_url.startswith("http"):
continue
if detail_url in seen:
continue
seen.add(detail_url)
name = link_tag.get_text(strip=True)
phone = ""
phone_tag = item.select_one("div.phone")
if phone_tag:
phone = normalize_phone(phone_tag.get_text(" ", strip=True))
address = ""
addr_tag = item.select_one("div.location .txt")
if addr_tag:
address = addr_tag.get_text(" ", strip=True)
specialties: List[str] = []
prof_tag = item.select_one("div.prof .txt")
if prof_tag:
specialties = [
x.strip() for x in re.split(r"[、,]", prof_tag.get_text(" ", strip=True)) if x.strip()
]
metric_text = ""
metric_tag = item.select_one("div.num-msg")
if metric_tag:
metric_text = metric_tag.get_text(" ", strip=True)
cards.append(
ListCard(
detail_url=detail_url,
name=name,
phone=phone,
address=address,
specialties=specialties,
metric_text=metric_text,
)
)
return cards
def parse_detail(self, detail_url: str) -> Dict:
html = self._get_text(detail_url, referer=SITE_BASE)
if "网站防火墙" in html or "访问被拒绝" in html or "/ipfilter/verify" in html:
raise RequestClientError(f"firewall blocked: {detail_url}")
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text(" ", strip=True)
name = ""
law_firm = ""
phone = ""
address = ""
practice_years: Optional[int] = None
specialties: List[str] = []
if soup.title:
title = soup.title.get_text(" ", strip=True)
match = re.search(r"([^\s_,。]+?)律师", title)
if match:
name = match.group(1).strip()
phone_candidates = [
soup.select_one(".data-w .tel-b b").get_text(" ", strip=True)
if soup.select_one(".data-w .tel-b b")
else "",
soup.select_one(".law-info-b .item .two-r.b").get_text(" ", strip=True)
if soup.select_one(".law-info-b .item .two-r.b")
else "",
text,
]
for candidate in phone_candidates:
phone = normalize_phone(candidate)
if phone:
break
law_firm_tag = soup.select_one(".law-info-b .item .two-nowrap")
if law_firm_tag:
law_firm = law_firm_tag.get_text(" ", strip=True)
for li in soup.select(".law-info-b .item"):
li_text = li.get_text(" ", strip=True)
if ("事务所" in li_text or "法律服务所" in li_text) and not law_firm:
law_firm = li_text
addr_tag = soup.select_one(".law-info-b .item .two-r[title]")
if addr_tag:
addr_value = (addr_tag.get("title") or "").strip()
if len(addr_value) > 8:
address = addr_value
if not address:
addr_tag = soup.select_one(".law-info-b .item .two-r")
if addr_tag:
addr_value = addr_tag.get_text(" ", strip=True)
if len(addr_value) > 8 and "律师" not in addr_value:
address = addr_value
year_match = YEAR_RE.search(text)
if year_match:
try:
practice_years = int(year_match.group(1))
s.close()
except Exception:
practice_years = None
pass
self._tls.session = None
specialties = [x.get_text(strip=True) for x in soup.select(".profession-b .item") if x.get_text(strip=True)]
return {
"name": name,
"law_firm": law_firm,
"phone": phone,
"address": address,
"practice_years": practice_years,
"specialties": specialties,
"detail_url": detail_url,
}
def crawl_city(self, target: CityTarget) -> Iterable[Dict]:
seen_details: Set[str] = set()
for page in range(1, self.max_pages + 1):
try:
cards, has_next, list_url = self.fetch_list_page(target, page)
except Exception as exc:
print(f"[list] 失败 {target.city_py} p{page}: {exc}")
break
if not cards:
break
for card in cards:
if card.detail_url in seen_details:
continue
seen_details.add(card.detail_url)
detail: Dict = {}
try:
detail = self.parse_detail(card.detail_url)
except Exception as exc:
print(f"[detail] 失败 {card.detail_url}: {exc}")
phone = normalize_phone(detail.get("phone") or card.phone)
profile_name = (detail.get("name") or card.name).replace("律师", "").strip()
now = int(time.time())
record_id = hashlib.md5(card.detail_url.encode("utf-8")).hexdigest()
yield {
"record_id": record_id,
"collected_at": now,
"source": {
"site": SITE_NAME,
"province_id": target.province_id,
"province": target.province_name,
"province_py": target.province_py,
"city_id": target.city_id,
"city": target.city_name,
"city_py": target.city_py,
"page": page,
"list_url": list_url,
"detail_url": card.detail_url,
},
"list_snapshot": {
"name": card.name,
"phone": card.phone,
"address": card.address,
"specialties": card.specialties,
"metric_text": card.metric_text,
},
"profile": {
"name": profile_name,
"law_firm": (detail.get("law_firm") or "").strip(),
"phone": phone,
"address": (detail.get("address") or card.address or "").strip(),
"practice_years": detail.get("practice_years"),
"specialties": detail.get("specialties") or card.specialties,
},
}
if self.sleep_seconds:
time.sleep(self.sleep_seconds)
if not has_next:
break
def _to_legacy_lawyer_row(self, record: Dict) -> Optional[Dict[str, str]]:
source = record.get("source", {}) or {}
profile = record.get("profile", {}) or {}
phone = normalize_phone(profile.get("phone", ""))
if not phone:
return None
province = (source.get("province") or "").strip()
city = (source.get("city") or province).strip()
return {
"name": (profile.get("name") or "").strip(),
"law_firm": (profile.get("law_firm") or "").strip(),
"province": province,
"city": city,
"phone": phone,
"url": (source.get("detail_url") or source.get("list_url") or "").strip(),
"domain": LEGACY_DOMAIN,
"create_time": int(record.get("collected_at") or time.time()),
"params": json.dumps(record, ensure_ascii=False),
}
def _existing_phones_in_db(self, phones: List[str]) -> Set[str]:
if not self.db or not phones:
def _existing_phones(self, phones: List[str]) -> Set[str]:
if not phones:
return set()
deduped = sorted({p for p in phones if p})
if not deduped:
return set()
existing: Set[str] = set()
cur = self.db.db.cursor()
try:
chunk_size = 500
for i in range(0, len(deduped), chunk_size):
chunk = deduped[i:i + chunk_size]
for i in range(0, len(phones), chunk_size):
chunk = phones[i:i + chunk_size]
placeholders = ",".join(["%s"] * len(chunk))
sql = f"SELECT phone FROM lawyer WHERE domain=%s AND phone IN ({placeholders})"
cur.execute(sql, [LEGACY_DOMAIN, *chunk])
cur.execute(sql, [DOMAIN, *chunk])
for row in cur.fetchall():
existing.add(row[0])
finally:
cur.close()
return existing
def _write_records_to_db(self, records: List[Dict]) -> Tuple[int, int]:
if not self.db:
return 0, 0
def _load_areas(self):
condition = "level = 2 and domain='法律快车'"
tables = ("area_new", "area", "area2")
last_error = None
for table in tables:
try:
rows = self.db.select_data(table, "pinyin, province, city", condition) or []
except Exception as exc:
last_error = exc
continue
if rows:
missing_pinyin = sum(1 for r in rows if not (r.get("pinyin") or "").strip())
print(f"[法律快车] 城市来源表: {table}, 城市数: {len(rows)}, 缺少pinyin: {missing_pinyin}")
return rows
rows: List[Dict[str, str]] = []
for record in records:
row = self._to_legacy_lawyer_row(record)
if row:
rows.append(row)
if not rows:
return 0, 0
if last_error:
print(f"[法律快车] 加载地区数据失败: {last_error}")
print("[法律快车] 无城市数据(已尝试 area_new/area/area2")
return []
existing = self._existing_phones_in_db([row["phone"] for row in rows])
inserted = 0
skipped = 0
def _get(self, url: str, max_retries: int = 3) -> Optional[str]:
return self._get_with_session(self.session, url, max_retries=max_retries, is_thread=False)
for row in rows:
phone = row.get("phone", "")
if not phone or phone in existing:
skipped += 1
def _get_with_session(self, session: requests.Session, url: str, max_retries: int = 3, is_thread: bool = False) -> Optional[str]:
for attempt in range(max_retries):
try:
resp = session.get(url, timeout=15, verify=False)
status_code = resp.status_code
text = resp.text
resp.close()
if status_code == 403:
if attempt < max_retries - 1:
wait_time = 2 ** attempt + random.uniform(0.3, 1.0)
print(f"请求失败 {url}: 403{wait_time}秒后重试 ({attempt + 1}/{max_retries})")
if is_thread:
self._refresh_thread_session()
session = self._get_thread_session()
else:
self._refresh_session()
session = self.session
time.sleep(wait_time)
continue
print(f"请求失败 {url}: 403 Forbidden")
return None
if status_code >= 400:
raise requests.exceptions.HTTPError(f"{status_code} Error: {url}")
return text
except requests.exceptions.RequestException as exc:
print(f"请求失败 {url}: {exc}")
return None
return None
def _parse_list(self, html: str, province: str, city: str) -> int:
soup = BeautifulSoup(html, "html.parser")
links = [a.get("href", "") for a in soup.select("a.hide_link")]
links = [link.replace("lll", "int") for link in links if link]
if not links:
return 0
detail_urls = [urljoin(DETAIL_BASE, link) for link in links]
results: List[Dict[str, str]] = []
with ThreadPoolExecutor(max_workers=self.max_workers) as ex:
futs = [ex.submit(self._parse_detail, u, province, city) for u in detail_urls]
for fut in as_completed(futs):
try:
data = fut.result()
except Exception as exc:
print(f" 详情解析异常: {exc}")
continue
if data and data.get("phone"):
results.append(data)
if not results:
return len(detail_urls)
phones = [d["phone"] for d in results if d.get("phone")]
existing = self._existing_phones(phones)
for data in results:
phone = data.get("phone")
if not phone:
continue
if phone in existing:
print(f" -- 已存在: {data['name']} ({phone})")
continue
try:
self.db.insert_data("lawyer", row)
existing.add(phone)
inserted += 1
self.db.insert_data("lawyer", data)
print(f" -> 新增: {data['name']} ({phone})")
except Exception as exc:
skipped += 1
print(f"[db] 插入失败 phone={phone} url={row.get('url', '')}: {exc}")
print(f" 插入失败 {data.get('url')}: {exc}")
return inserted, skipped
return len(detail_urls)
def crawl(
self,
output_path: str,
max_cities: int = 0,
city_filter: Optional[str] = None,
) -> None:
cities = self.discover_cities()
print(f"[discover] 共发现城市 {len(cities)}")
def _parse_detail(self, url: str, province: str, city: str) -> Optional[Dict[str, str]]:
html = None
sess = self._get_thread_session()
html = self._get_with_session(sess, url, max_retries=3, is_thread=True)
if not html:
return None
if city_filter:
key = city_filter.strip().lower()
cities = [
c for c in cities
if key in c.city_py.lower() or key in c.city_name.lower()
]
print(f"[discover] 过滤后城市 {len(cities)} 个, filter={city_filter}")
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text(" ")
if max_cities > 0:
cities = cities[:max_cities]
print(f"[discover] 截断城市数 {len(cities)}")
name = ""
title_tag = soup.find("title")
if title_tag:
match = re.search(r"(\S+)律师", title_tag.get_text())
if match:
name = match.group(1)
if not name:
intl_div = soup.find("div", class_="intl")
if intl_div:
match = re.search(r"(\S+)律师", intl_div.get_text())
if match:
name = match.group(1)
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
phone = ""
phone_pattern = r"1[3-9]\d{9}"
for item in soup.select("div.item.flex"):
label = item.find("div", class_="label")
desc = item.find("div", class_="desc")
if not label or not desc:
continue
label_text = label.get_text()
desc_text = desc.get_text().replace("-", "")
if "联系电话" in label_text or "电话" in label_text:
matches = re.findall(phone_pattern, desc_text)
if matches:
phone = matches[0]
break
if not phone:
matches = re.findall(phone_pattern, text.replace("-", ""))
if matches:
phone = matches[0]
if not phone:
print(f" 无手机号: {url}")
return None
seen_ids: Set[str] = set()
if os.path.exists(output_path):
with open(output_path, "r", encoding="utf-8") as old_file:
for line in old_file:
line = line.strip()
if not line:
continue
try:
item = json.loads(line)
except Exception:
continue
rid = item.get("record_id")
if rid:
seen_ids.add(rid)
print(f"[resume] 已有记录 {len(seen_ids)}")
law_firm = ""
for item in soup.select("div.item.flex"):
label = item.find("div", class_="label")
desc = item.find("div", class_="desc")
if not label or not desc:
continue
if "执业律所" in label.get_text() or "律所" in label.get_text():
law_firm = desc.get_text(strip=True).replace("已认证", "")
break
total_new_json = 0
total_new_db = 0
total_skip_db = 0
params = {
"list_url": url,
"province": province,
"city": city,
}
with open(output_path, "a", encoding="utf-8") as out:
for idx, target in enumerate(cities, start=1):
print(
f"[city {idx}/{len(cities)}] {target.province_name}-{target.city_name} "
f"({target.city_py})"
)
city_records = list(self.crawl_city(target))
return {
"name": name or "",
"law_firm": law_firm,
"province": province,
"city": city,
"phone": phone,
"url": url,
"domain": DOMAIN,
"create_time": int(time.time()),
"params": json.dumps(params, ensure_ascii=False)
}
city_new_json = 0
for record in city_records:
rid = record["record_id"]
if rid in seen_ids:
continue
out.write(json.dumps(record, ensure_ascii=False) + "\n")
seen_ids.add(rid)
city_new_json += 1
total_new_json += 1
def run(self):
print("启动法律快车采集...")
areas = self._load_areas()
if not areas:
print("无地区数据")
return
city_new_db, city_skip_db = self._write_records_to_db(city_records)
total_new_db += city_new_db
total_skip_db += city_skip_db
print(
f"[city] 采集{len(city_records)}条, JSON新增{city_new_json}条, "
f"DB新增{city_new_db}条, DB跳过{city_skip_db}"
)
print(
f"[done] JSON新增{total_new_json}条, DB新增{total_new_db}条, "
f"DB跳过{total_skip_db}条, 输出: {output_path}"
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="法律快车全新采集脚本(站点数据直采)")
parser.add_argument(
"--output",
default="/www/wwwroot/lawyers/data/lawtime_records_all.jsonl",
help="输出 jsonl 文件路径",
)
parser.add_argument(
"--max-cities",
type=int,
default=0,
help="最多采集多少个城市,0 表示不限",
)
parser.add_argument(
"--max-pages",
type=int,
default=9999,
help="每个城市最多采集多少页",
)
parser.add_argument(
"--city-filter",
default="",
help="按城市拼音或城市名过滤,如 beijing",
)
parser.add_argument(
"--sleep",
type=float,
default=0.1,
help="详情页请求间隔秒数",
)
parser.add_argument(
"--direct",
action="store_true",
help="直连模式,不使用 proxy_settings.json 代理",
)
parser.add_argument(
"--no-db",
action="store_true",
help="只输出 JSONL,不写入数据库",
)
return parser.parse_args()
def main():
args = parse_args()
if args.no_db:
crawler = LawtimeCrawler(
max_pages=args.max_pages,
sleep_seconds=args.sleep,
use_proxy=not args.direct,
db_connection=None,
)
crawler.crawl(
output_path=args.output,
max_cities=args.max_cities,
city_filter=args.city_filter or None,
)
return
with Db() as db:
crawler = LawtimeCrawler(
max_pages=args.max_pages,
sleep_seconds=args.sleep,
use_proxy=not args.direct,
db_connection=db,
)
crawler.crawl(
output_path=args.output,
max_cities=args.max_cities,
city_filter=args.city_filter or None,
)
for area in areas:
pinyin = area.get("pinyin")
province = area.get("province", "")
city = area.get("city", "")
if not pinyin:
continue
page = 1
while True:
list_url = LIST_BASE.format(pinyin=pinyin, page=page)
print(f"采集 {province}-{city}{page} 页: {list_url}")
html = self._get(list_url)
if not html:
break
link_count = self._parse_list(html, province, city)
if link_count == 0:
break
page += 1
print("法律快车采集完成")
if __name__ == "__main__":
main()
with Db() as db:
spider = LawtimeSpider(db)
spider.run()