#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" LOG_DIR="${PROJECT_ROOT}/logs" DATA_DIR="${PROJECT_ROOT}/data" mkdir -p "${LOG_DIR}" "${DATA_DIR}" if [[ -x "${PROJECT_ROOT}/.venv/bin/python" ]]; then PYTHON_BIN="${PROJECT_ROOT}/.venv/bin/python" else PYTHON_BIN="python3" fi RUN_MODE="${RUN_MODE:-parallel}" # parallel | sequential echo "[start] project=${PROJECT_ROOT}" echo "[start] python=${PYTHON_BIN}" echo "[start] mode=${RUN_MODE}" echo "[start] proxy=request/proxy_settings.json" # 大律师(新结构采集 + 写库)可通过环境变量控制 DLS_OUTPUT="${DLS_OUTPUT:-${DATA_DIR}/dls_records.jsonl}" DLS_MAX_CITIES="${DLS_MAX_CITIES:-0}" DLS_MAX_PAGES="${DLS_MAX_PAGES:-0}" DLS_SLEEP="${DLS_SLEEP:-0.2}" DLS_CITY_FILTER="${DLS_CITY_FILTER:-}" DLS_EXTRA_ARGS=() if [[ "${DLS_MAX_CITIES}" != "0" ]]; then DLS_EXTRA_ARGS+=(--max-cities "${DLS_MAX_CITIES}") fi if [[ "${DLS_MAX_PAGES}" != "0" ]]; then DLS_EXTRA_ARGS+=(--max-pages "${DLS_MAX_PAGES}") fi if [[ -n "${DLS_CITY_FILTER}" ]]; then DLS_EXTRA_ARGS+=(--city-filter "${DLS_CITY_FILTER}") fi DLS_EXTRA_ARGS+=(--sleep "${DLS_SLEEP}" --output "${DLS_OUTPUT}") if [[ "${DLS_DIRECT:-0}" == "1" ]]; then DLS_EXTRA_ARGS+=(--direct) fi if [[ "${DLS_NO_DB:-0}" == "1" ]]; then DLS_EXTRA_ARGS+=(--no-db) fi run_bg() { local name="$1" shift local logfile="${LOG_DIR}/${name}.log" nohup env PYTHONUNBUFFERED=1 "$@" > "${logfile}" 2>&1 & echo "[start] ${name} pid=$! log=${logfile}" } run_fg() { local name="$1" shift local logfile="${LOG_DIR}/${name}.log" echo "[start] ${name} fg log=${logfile}" env PYTHONUNBUFFERED=1 "$@" > "${logfile}" 2>&1 } if [[ "${RUN_MODE}" == "sequential" ]]; then run_fg "dls_fresh" "${PYTHON_BIN}" "${SCRIPT_DIR}/dls_fresh.py" "${DLS_EXTRA_ARGS[@]}" run_fg "findlaw" "${PYTHON_BIN}" "${SCRIPT_DIR}/findlaw.py" run_fg "lawtime" "${PYTHON_BIN}" "${SCRIPT_DIR}/lawtime.py" run_fg "six4365" "${PYTHON_BIN}" "${SCRIPT_DIR}/six4365.py" run_fg "hualv" "${PYTHON_BIN}" "${SCRIPT_DIR}/hualv.py" echo "[done] sequential completed" else run_bg "dls_fresh" "${PYTHON_BIN}" "${SCRIPT_DIR}/dls_fresh.py" "${DLS_EXTRA_ARGS[@]}" run_bg "findlaw" "${PYTHON_BIN}" "${SCRIPT_DIR}/findlaw.py" run_bg "lawtime" "${PYTHON_BIN}" "${SCRIPT_DIR}/lawtime.py" run_bg "six4365" "${PYTHON_BIN}" "${SCRIPT_DIR}/six4365.py" run_bg "hualv" "${PYTHON_BIN}" "${SCRIPT_DIR}/hualv.py" echo "[done] all crawlers started in background" fi