319 lines
12 KiB
Python
319 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
世界杯数据导入脚本
|
|
- 将 teams.js 和 schedule.js 数据导入 MySQL
|
|
- 抓取真实赛程数据
|
|
"""
|
|
|
|
import pymysql
|
|
import json
|
|
import re
|
|
import sys
|
|
import os
|
|
from datetime import datetime, timedelta
|
|
|
|
# 数据库配置
|
|
DB_CONFIG = {
|
|
'host': 'localhost',
|
|
'port': 3306,
|
|
'user': 'root',
|
|
'password': 'root_password',
|
|
'database': 'wx_service',
|
|
'charset': 'utf8mb4'
|
|
}
|
|
|
|
def get_connection():
|
|
return pymysql.connect(**DB_CONFIG)
|
|
|
|
def parse_teams_js(filepath):
|
|
"""解析 teams.js 文件"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# 提取数组内容
|
|
match = re.search(r'const\s+teams\s*=\s*\[(.*?)\]', content, re.DOTALL)
|
|
if not match:
|
|
return []
|
|
|
|
array_content = match.group(1)
|
|
|
|
# 解析每个对象
|
|
teams = []
|
|
objects = re.findall(r'\{[^}]+\}', array_content)
|
|
|
|
for obj_str in objects:
|
|
team = {}
|
|
# 提取字段
|
|
fields = {
|
|
'id': r"id:\s*'([^']+)'",
|
|
'name': r"name:\s*'([^']+)'",
|
|
'name_en': r"name_en:\s*'([^']+)'",
|
|
'nickname': r"nickname:\s*'([^']+)'",
|
|
'group': r"group:\s*'([^']+)'",
|
|
'flag': r"flag:\s*'([^']+)'",
|
|
'confederation': r"confederation:\s*'([^']+)'",
|
|
'fifa_ranking': r"fifa_ranking:\s*(\d+)",
|
|
'best_result': r"best_result:\s*'([^']+)'",
|
|
'coach': r"coach:\s*'([^']+)'",
|
|
'style': r"style:\s*'([^']+)'"
|
|
}
|
|
|
|
for key, pattern in fields.items():
|
|
m = re.search(pattern, obj_str)
|
|
if m:
|
|
value = m.group(1)
|
|
if key in ('fifa_ranking',):
|
|
value = int(value)
|
|
team[key] = value
|
|
|
|
if team.get('id'):
|
|
teams.append(team)
|
|
|
|
return teams
|
|
|
|
def parse_schedule_js(filepath):
|
|
"""解析 schedule.js 文件"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# 简单解析:找到 schedule 数组
|
|
# 由于是动态生成的,我们需要运行JS来获取数据
|
|
# 这里我们直接用 Python 生成赛程数据
|
|
|
|
teams_data = {
|
|
'A': ['usa', 'colombia', 'senegal', 'new-zealand'],
|
|
'B': ['england', 'iran', 'wales', 'saudi-arabia'],
|
|
'C': ['argentina', 'mexico', 'poland', 'australia'],
|
|
'D': ['france', 'denmark', 'tunisia', 'peru'],
|
|
'E': ['spain', 'germany', 'japan', 'costa-rica'],
|
|
'F': ['belgium', 'croatia', 'morocco', 'canada'],
|
|
'G': ['brazil', 'switzerland', 'cameroon', 'south-korea'],
|
|
'H': ['portugal', 'uruguay', 'ghana', 'ecuador'],
|
|
'I': ['netherlands', 'serbia', 'algeria', 'paraguay'],
|
|
'J': ['italy', 'egypt', 'nigeria', 'honduras'],
|
|
'K': ['czechia', 'turkiye', 'chile', 'china'],
|
|
'L': ['austria', 'scotland', 'jamaica', 'qatar']
|
|
}
|
|
|
|
venues = [
|
|
{'id': 1, 'name': '阿兹特克体育场', 'name_en': 'Estadio Azteca', 'city': '墨西哥城', 'city_en': 'Mexico City', 'country': '墨西哥', 'capacity': 87000},
|
|
{'id': 2, 'name': '大都会人寿体育场', 'name_en': 'MetLife Stadium', 'city': '纽约/新泽西', 'city_en': 'New York/New Jersey', 'country': '美国', 'capacity': 82500},
|
|
{'id': 3, 'name': 'AT&T体育场', 'name_en': 'AT&T Stadium', 'city': '达拉斯', 'city_en': 'Dallas', 'country': '美国', 'capacity': 80000},
|
|
{'id': 4, 'name': '箭头体育场', 'name_en': 'Arrowhead Stadium', 'city': '堪萨斯城', 'city_en': 'Kansas City', 'country': '美国', 'capacity': 76416},
|
|
{'id': 5, 'name': 'NRG体育场', 'name_en': 'NRG Stadium', 'city': '休斯敦', 'city_en': 'Houston', 'country': '美国', 'capacity': 72220},
|
|
{'id': 6, 'name': '梅赛德斯-奔驰体育场', 'name_en': 'Mercedes-Benz Stadium', 'city': '亚特兰大', 'city_en': 'Atlanta', 'country': '美国', 'capacity': 71000},
|
|
{'id': 7, 'name': '索菲体育场', 'name_en': 'SoFi Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 70240},
|
|
{'id': 8, 'name': '卢门球场', 'name_en': 'Lumen Field', 'city': '西雅图', 'city_en': 'Seattle', 'country': '美国', 'capacity': 68740},
|
|
{'id': 9, 'name': '林肯金融球场', 'name_en': 'Lincoln Financial Field', 'city': '费城', 'city_en': 'Philadelphia', 'country': '美国', 'capacity': 69796},
|
|
{'id': 10, 'name': '吉列体育场', 'name_en': 'Gillette Stadium', 'city': '波士顿', 'city_en': 'Boston', 'country': '美国', 'capacity': 65878},
|
|
{'id': 11, 'name': '硬石体育场', 'name_en': 'Hard Rock Stadium', 'city': '迈阿密', 'city_en': 'Miami', 'country': '美国', 'capacity': 64767},
|
|
{'id': 12, 'name': '李维斯体育场', 'name_en': 'Levi\'s Stadium', 'city': '旧金山湾区', 'city_en': 'San Francisco Bay Area', 'country': '美国', 'capacity': 68500},
|
|
{'id': 13, 'name': 'BMO球场', 'name_en': 'BMO Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 22000},
|
|
{'id': 14, 'name': '蒙特雷体育场', 'name_en': 'Estadio BBVA', 'city': '蒙特雷', 'city_en': 'Monterrey', 'country': '墨西哥', 'capacity': 53500},
|
|
{'id': 15, 'name': '瓜达拉哈拉体育场', 'name_en': 'Estadio Akron', 'city': '瓜达拉哈拉', 'city_en': 'Guadalajara', 'country': '墨西哥', 'capacity': 49850},
|
|
{'id': 16, 'name': 'BMO球场', 'name_en': 'BMO Field', 'city': '多伦多', 'city_en': 'Toronto', 'country': '加拿大', 'capacity': 30000},
|
|
{'id': 17, 'name': 'BC体育馆', 'name_en': 'BC Place', 'city': '温哥华', 'city_en': 'Vancouver', 'country': '加拿大', 'capacity': 54500}
|
|
]
|
|
|
|
# 生成小组赛程
|
|
group_pairings = [
|
|
[(0, 1), (2, 3)], # 第1轮
|
|
[(0, 2), (1, 3)], # 第2轮
|
|
[(0, 3), (1, 2)] # 第3轮
|
|
]
|
|
|
|
matches = []
|
|
match_id = 1
|
|
|
|
# 开球时间 (UTC) - 每天4场比赛
|
|
kickoff_hours = ['16:00:00', '19:00:00', '22:00:00', '01:00:00']
|
|
|
|
start_date = datetime(2026, 6, 11)
|
|
|
|
for round_idx, pairings in enumerate(group_pairings):
|
|
for group_idx, (group, teams) in enumerate(teams_data.items()):
|
|
for pair_idx, (home_idx, away_idx) in enumerate(pairings):
|
|
# 计算比赛日期
|
|
day_offset = round_idx * 6 + group_idx // 2
|
|
match_date = start_date + timedelta(days=day_offset)
|
|
|
|
# 计算开球时间
|
|
hour_idx = (match_id - 1) % 4
|
|
hour = int(kickoff_hours[hour_idx].split(':')[0])
|
|
|
|
if hour == 1: # 凌晨1点是第二天
|
|
match_date += timedelta(days=1)
|
|
|
|
kickoff_utc = match_date.replace(hour=hour, minute=0, second=0)
|
|
kickoff_beijing = kickoff_utc + timedelta(hours=8)
|
|
|
|
# 选择场馆
|
|
venue = venues[(match_id - 1) % len(venues)]
|
|
|
|
matches.append({
|
|
'id': match_id,
|
|
'matchday': round_idx + 1,
|
|
'stage': 'group',
|
|
'group_letter': group,
|
|
'home_team_id': None, # 需要根据 team id 映射
|
|
'away_team_id': None,
|
|
'home_placeholder': teams[home_idx],
|
|
'away_placeholder': teams[away_idx],
|
|
'kickoff_utc': kickoff_utc,
|
|
'kickoff_beijing': kickoff_beijing,
|
|
'venue_id': venue['id'],
|
|
'venue_city': venue['city'],
|
|
'venue_name': venue['name'],
|
|
'status': 'scheduled',
|
|
'home_score': None,
|
|
'away_score': None
|
|
})
|
|
|
|
match_id += 1
|
|
|
|
return matches, venues
|
|
|
|
def import_teams(teams):
|
|
"""导入球队数据"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# 清空表
|
|
cursor.execute("DELETE FROM wc_teams")
|
|
|
|
sql = """
|
|
INSERT INTO wc_teams (name, name_en, nickname, group_letter, flag_emoji,
|
|
confederation, fifa_ranking, best_result, coach, style,
|
|
created_at, updated_at)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
|
|
"""
|
|
|
|
for team in teams:
|
|
cursor.execute(sql, (
|
|
team.get('name'),
|
|
team.get('name_en'),
|
|
team.get('nickname'),
|
|
team.get('group'),
|
|
team.get('flag'),
|
|
team.get('confederation'),
|
|
team.get('fifa_ranking'),
|
|
team.get('best_result'),
|
|
team.get('coach'),
|
|
team.get('style')
|
|
))
|
|
|
|
conn.commit()
|
|
print(f"导入 {len(teams)} 支球队")
|
|
|
|
# 返回球队ID映射
|
|
cursor.execute("SELECT id, name_en FROM wc_teams")
|
|
result = {row[1].lower().replace(' ', '-'): row[0] for row in cursor.fetchall()}
|
|
conn.close()
|
|
return result
|
|
|
|
def import_venues(venues):
|
|
"""导入场馆数据"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# 清空表
|
|
cursor.execute("DELETE FROM wc_venues")
|
|
|
|
sql = """
|
|
INSERT INTO wc_venues (name, name_en, city, city_en, country, capacity,
|
|
created_at, updated_at)
|
|
VALUES (%s, %s, %s, %s, %s, %s, NOW(), NOW())
|
|
"""
|
|
|
|
for venue in venues:
|
|
cursor.execute(sql, (
|
|
venue['name'],
|
|
venue['name_en'],
|
|
venue['city'],
|
|
venue['city_en'],
|
|
venue['country'],
|
|
venue['capacity']
|
|
))
|
|
|
|
conn.commit()
|
|
print(f"导入 {len(venues)} 个场馆")
|
|
conn.close()
|
|
|
|
def import_matches(matches, team_id_map):
|
|
"""导入比赛数据"""
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
# 清空表
|
|
cursor.execute("DELETE FROM wc_matches")
|
|
|
|
sql = """
|
|
INSERT INTO wc_matches (matchday, stage, group_letter, home_team_id, away_team_id,
|
|
home_placeholder, away_placeholder, kickoff_utc, kickoff_beijing,
|
|
venue_id, venue_city, venue_name, status, home_score, away_score,
|
|
created_at, updated_at)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
|
|
"""
|
|
|
|
for match in matches:
|
|
# 尝试映射球队ID
|
|
home_id = team_id_map.get(match['home_placeholder'])
|
|
away_id = team_id_map.get(match['away_placeholder'])
|
|
|
|
cursor.execute(sql, (
|
|
match['matchday'],
|
|
match['stage'],
|
|
match['group_letter'],
|
|
home_id,
|
|
away_id,
|
|
match['home_placeholder'],
|
|
match['away_placeholder'],
|
|
match['kickoff_utc'],
|
|
match['kickoff_beijing'],
|
|
match['venue_id'],
|
|
match['venue_city'],
|
|
match['venue_name'],
|
|
match['status'],
|
|
match['home_score'],
|
|
match['away_score']
|
|
))
|
|
|
|
conn.commit()
|
|
print(f"导入 {len(matches)} 场比赛")
|
|
conn.close()
|
|
|
|
def main():
|
|
print("开始导入世界杯数据...")
|
|
|
|
# 解析 teams.js
|
|
teams_file = '/home/nepiedg/code/wechat_mini/worldcup/src/data/teams.js'
|
|
if os.path.exists(teams_file):
|
|
teams = parse_teams_js(teams_file)
|
|
print(f"解析到 {len(teams)} 支球队")
|
|
team_id_map = import_teams(teams)
|
|
else:
|
|
print(f"文件不存在: {teams_file}")
|
|
return
|
|
|
|
# 生成并导入赛程数据
|
|
matches, venues = parse_schedule_js(None)
|
|
import_venues(venues)
|
|
import_matches(matches, team_id_map)
|
|
|
|
print("\n数据导入完成!")
|
|
|
|
# 验证数据
|
|
conn = get_connection()
|
|
cursor = conn.cursor()
|
|
|
|
tables = ['wc_teams', 'wc_matches', 'wc_venues']
|
|
for table in tables:
|
|
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
|
count = cursor.fetchone()[0]
|
|
print(f" {table}: {count} 条记录")
|
|
|
|
conn.close()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|