chore: 暂存本地修改
This commit is contained in:
Executable
+53
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# 切换到脚本所在目录,确保相对路径正确
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
# 强制开启代理:用于容易被限频/拦截的站点
|
||||
export PROXY_ENABLED=1
|
||||
|
||||
# 代理模式下默认更保守一点,避免冲爆代理与触发风控
|
||||
export PROXY_MAX_REQUESTS_PER_SECOND="${PROXY_MAX_REQUESTS_PER_SECOND:-5}"
|
||||
export PROXY_MAX_CONCURRENT_REQUESTS="${PROXY_MAX_CONCURRENT_REQUESTS:-5}"
|
||||
|
||||
# 可选:开启代理连通性测试输出(部分脚本会打印测试信息/代理状态)
|
||||
export PROXY_TEST="${PROXY_TEST:-0}"
|
||||
|
||||
is_job_running() {
|
||||
local script="$1"
|
||||
local script_regex="${script//./\\.}"
|
||||
pgrep -af "(^|[[:space:]/])${script_regex}([[:space:]]|$)" || true
|
||||
}
|
||||
|
||||
start_job() {
|
||||
local script="$1"
|
||||
local log_file="$2"
|
||||
local label="$3"
|
||||
local existing
|
||||
|
||||
existing="$(is_job_running "${script}")"
|
||||
if [[ -n "${existing}" ]]; then
|
||||
echo "跳过 ${label}: ${script} 已在运行"
|
||||
echo "${existing}" | head -n 1
|
||||
return 0
|
||||
fi
|
||||
|
||||
nohup python "../common_sites/${script}" > "${log_file}" 2>&1 &
|
||||
echo "启动 ${label}: ${script} -> ${log_file}"
|
||||
sleep 1
|
||||
}
|
||||
|
||||
echo "代理模式(PROXY_ENABLED=1),每周一次建议用 cron 调度"
|
||||
echo "代理配置读取自 request/proxy_settings.json"
|
||||
echo "每周一次代理任务 = 全量采集所有站点"
|
||||
|
||||
# 每周一次代理任务做全量采集:
|
||||
# - 强风控/更敏感站点:找法网、法律快车
|
||||
# - 其余站点也一并跑,保证每周至少有一次“全量最新数据”刷新
|
||||
start_job "dls.py" "proxy_dls.log" "大律师(代理全量)"
|
||||
start_job "dls_pc.py" "proxy_dls_pc.log" "大律师PC站(代理全量)"
|
||||
start_job "findlaw.py" "proxy_findlaw.log" "找法网(代理)"
|
||||
start_job "lawtime.py" "proxy_lawtime.log" "法律快车(代理)"
|
||||
start_job "hualv.py" "proxy_hualv.log" "华律(代理全量)"
|
||||
start_job "six4365.py" "proxy_six4365.log" "律图(代理全量)"
|
||||
Reference in New Issue
Block a user