Files

319 lines
12 KiB
Python

#!/usr/bin/env python3
"""
世界杯数据导入脚本
- 将 teams.js 和 schedule.js 数据导入 MySQL
- 抓取真实赛程数据
"""
import pymysql
import json
import re
import sys
import os
from datetime import datetime, timedelta
# 数据库配置
DB_CONFIG = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': 'root_password',
'database': 'wx_service',
'charset': 'utf8mb4'
}
def get_connection():
return pymysql.connect(**DB_CONFIG)
def parse_teams_js(filepath):
"""解析 teams.js 文件"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 提取数组内容
match = re.search(r'const\s+teams\s*=\s*\[(.*?)\]', content, re.DOTALL)
if not match:
return []
array_content = match.group(1)
# 解析每个对象
teams = []
objects = re.findall(r'\{[^}]+\}', array_content)
for obj_str in objects:
team = {}
# 提取字段
fields = {
'id': r"id:\s*'([^']+)'",
'name': r"name:\s*'([^']+)'",
'name_en': r"name_en:\s*'([^']+)'",
'nickname': r"nickname:\s*'([^']+)'",
'group': r"group:\s*'([^']+)'",
'flag': r"flag:\s*'([^']+)'",
'confederation': r"confederation:\s*'([^']+)'",
'fifa_ranking': r"fifa_ranking:\s*(\d+)",
'best_result': r"best_result:\s*'([^']+)'",
'coach': r"coach:\s*'([^']+)'",
'style': r"style:\s*'([^']+)'"
}
for key, pattern in fields.items():
m = re.search(pattern, obj_str)
if m:
value = m.group(1)
if key in ('fifa_ranking',):
value = int(value)
team[key] = value
if team.get('id'):
teams.append(team)
return teams
def parse_schedule_js(filepath):
"""解析 schedule.js 文件"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 简单解析:找到 schedule 数组
# 由于是动态生成的,我们需要运行JS来获取数据
# 这里我们直接用 Python 生成赛程数据
teams_data = {
'A': ['usa', 'colombia', 'senegal', 'new-zealand'],
'B': ['england', 'iran', 'wales', 'saudi-arabia'],
'C': ['argentina', 'mexico', 'poland', 'australia'],
'D': ['france', 'denmark', 'tunisia', 'peru'],
'E': ['spain', 'germany', 'japan', 'costa-rica'],
'F': ['belgium', 'croatia', 'morocco', 'canada'],
'G': ['brazil', 'switzerland', 'cameroon', 'south-korea'],
'H': ['portugal', 'uruguay', 'ghana', 'ecuador'],
'I': ['netherlands', 'serbia', 'algeria', 'paraguay'],
'J': ['italy', 'egypt', 'nigeria', 'honduras'],
'K': ['czechia', 'turkiye', 'chile', 'china'],
'L': ['austria', 'scotland', 'jamaica', 'qatar']
}
venues = [
{'id': 1, 'name': '阿兹特克体育场', 'name_en': 'Estadio Azteca', 'city': '墨西哥城', 'city_en': 'Mexico City', 'country': '墨西哥', 'capacity': 87000},
{'id': 2, 'name': '大都会人寿体育场', 'name_en': 'MetLife Stadium', 'city': '纽约/新泽西', 'city_en': 'New York/New Jersey', 'country': '美国', 'capacity': 82500},
{'id': 3, 'name': 'AT&T体育场', 'name_en': 'AT&T Stadium', 'city': '达拉斯', 'city_en': 'Dallas', 'country': '美国', 'capacity': 80000},
{'id': 4, 'name': '箭头体育场', 'name_en': 'Arrowhead Stadium', 'city': '堪萨斯城', 'city_en': 'Kansas City', 'country': '美国', 'capacity': 76416},
{'id': 5, 'name': 'NRG体育场', 'name_en': 'NRG Stadium', 'city': '休斯敦', 'city_en': 'Houston', 'country': '美国', 'capacity': 72220},
{'id': 6, 'name': '梅赛德斯-奔驰体育场', 'name_en': 'Mercedes-Benz Stadium', 'city': '亚特兰大', 'city_en': 'Atlanta', 'country': '美国', 'capacity': 71000},
{'id': 7, 'name': '索菲体育场', 'name_en': 'SoFi Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 70240},
{'id': 8, 'name': '卢门球场', 'name_en': 'Lumen Field', 'city': '西雅图', 'city_en': 'Seattle', 'country': '美国', 'capacity': 68740},
{'id': 9, 'name': '林肯金融球场', 'name_en': 'Lincoln Financial Field', 'city': '费城', 'city_en': 'Philadelphia', 'country': '美国', 'capacity': 69796},
{'id': 10, 'name': '吉列体育场', 'name_en': 'Gillette Stadium', 'city': '波士顿', 'city_en': 'Boston', 'country': '美国', 'capacity': 65878},
{'id': 11, 'name': '硬石体育场', 'name_en': 'Hard Rock Stadium', 'city': '迈阿密', 'city_en': 'Miami', 'country': '美国', 'capacity': 64767},
{'id': 12, 'name': '李维斯体育场', 'name_en': 'Levi\'s Stadium', 'city': '旧金山湾区', 'city_en': 'San Francisco Bay Area', 'country': '美国', 'capacity': 68500},
{'id': 13, 'name': 'BMO球场', 'name_en': 'BMO Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 22000},
{'id': 14, 'name': '蒙特雷体育场', 'name_en': 'Estadio BBVA', 'city': '蒙特雷', 'city_en': 'Monterrey', 'country': '墨西哥', 'capacity': 53500},
{'id': 15, 'name': '瓜达拉哈拉体育场', 'name_en': 'Estadio Akron', 'city': '瓜达拉哈拉', 'city_en': 'Guadalajara', 'country': '墨西哥', 'capacity': 49850},
{'id': 16, 'name': 'BMO球场', 'name_en': 'BMO Field', 'city': '多伦多', 'city_en': 'Toronto', 'country': '加拿大', 'capacity': 30000},
{'id': 17, 'name': 'BC体育馆', 'name_en': 'BC Place', 'city': '温哥华', 'city_en': 'Vancouver', 'country': '加拿大', 'capacity': 54500}
]
# 生成小组赛程
group_pairings = [
[(0, 1), (2, 3)], # 第1轮
[(0, 2), (1, 3)], # 第2轮
[(0, 3), (1, 2)] # 第3轮
]
matches = []
match_id = 1
# 开球时间 (UTC) - 每天4场比赛
kickoff_hours = ['16:00:00', '19:00:00', '22:00:00', '01:00:00']
start_date = datetime(2026, 6, 11)
for round_idx, pairings in enumerate(group_pairings):
for group_idx, (group, teams) in enumerate(teams_data.items()):
for pair_idx, (home_idx, away_idx) in enumerate(pairings):
# 计算比赛日期
day_offset = round_idx * 6 + group_idx // 2
match_date = start_date + timedelta(days=day_offset)
# 计算开球时间
hour_idx = (match_id - 1) % 4
hour = int(kickoff_hours[hour_idx].split(':')[0])
if hour == 1: # 凌晨1点是第二天
match_date += timedelta(days=1)
kickoff_utc = match_date.replace(hour=hour, minute=0, second=0)
kickoff_beijing = kickoff_utc + timedelta(hours=8)
# 选择场馆
venue = venues[(match_id - 1) % len(venues)]
matches.append({
'id': match_id,
'matchday': round_idx + 1,
'stage': 'group',
'group_letter': group,
'home_team_id': None, # 需要根据 team id 映射
'away_team_id': None,
'home_placeholder': teams[home_idx],
'away_placeholder': teams[away_idx],
'kickoff_utc': kickoff_utc,
'kickoff_beijing': kickoff_beijing,
'venue_id': venue['id'],
'venue_city': venue['city'],
'venue_name': venue['name'],
'status': 'scheduled',
'home_score': None,
'away_score': None
})
match_id += 1
return matches, venues
def import_teams(teams):
"""导入球队数据"""
conn = get_connection()
cursor = conn.cursor()
# 清空表
cursor.execute("DELETE FROM wc_teams")
sql = """
INSERT INTO wc_teams (name, name_en, nickname, group_letter, flag_emoji,
confederation, fifa_ranking, best_result, coach, style,
created_at, updated_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
"""
for team in teams:
cursor.execute(sql, (
team.get('name'),
team.get('name_en'),
team.get('nickname'),
team.get('group'),
team.get('flag'),
team.get('confederation'),
team.get('fifa_ranking'),
team.get('best_result'),
team.get('coach'),
team.get('style')
))
conn.commit()
print(f"导入 {len(teams)} 支球队")
# 返回球队ID映射
cursor.execute("SELECT id, name_en FROM wc_teams")
result = {row[1].lower().replace(' ', '-'): row[0] for row in cursor.fetchall()}
conn.close()
return result
def import_venues(venues):
"""导入场馆数据"""
conn = get_connection()
cursor = conn.cursor()
# 清空表
cursor.execute("DELETE FROM wc_venues")
sql = """
INSERT INTO wc_venues (name, name_en, city, city_en, country, capacity,
created_at, updated_at)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), NOW())
"""
for venue in venues:
cursor.execute(sql, (
venue['name'],
venue['name_en'],
venue['city'],
venue['city_en'],
venue['country'],
venue['capacity']
))
conn.commit()
print(f"导入 {len(venues)} 个场馆")
conn.close()
def import_matches(matches, team_id_map):
"""导入比赛数据"""
conn = get_connection()
cursor = conn.cursor()
# 清空表
cursor.execute("DELETE FROM wc_matches")
sql = """
INSERT INTO wc_matches (matchday, stage, group_letter, home_team_id, away_team_id,
home_placeholder, away_placeholder, kickoff_utc, kickoff_beijing,
venue_id, venue_city, venue_name, status, home_score, away_score,
created_at, updated_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
"""
for match in matches:
# 尝试映射球队ID
home_id = team_id_map.get(match['home_placeholder'])
away_id = team_id_map.get(match['away_placeholder'])
cursor.execute(sql, (
match['matchday'],
match['stage'],
match['group_letter'],
home_id,
away_id,
match['home_placeholder'],
match['away_placeholder'],
match['kickoff_utc'],
match['kickoff_beijing'],
match['venue_id'],
match['venue_city'],
match['venue_name'],
match['status'],
match['home_score'],
match['away_score']
))
conn.commit()
print(f"导入 {len(matches)} 场比赛")
conn.close()
def main():
print("开始导入世界杯数据...")
# 解析 teams.js
teams_file = '/home/nepiedg/code/wechat_mini/worldcup/src/data/teams.js'
if os.path.exists(teams_file):
teams = parse_teams_js(teams_file)
print(f"解析到 {len(teams)} 支球队")
team_id_map = import_teams(teams)
else:
print(f"文件不存在: {teams_file}")
return
# 生成并导入赛程数据
matches, venues = parse_schedule_js(None)
import_venues(venues)
import_matches(matches, team_id_map)
print("\n数据导入完成!")
# 验证数据
conn = get_connection()
cursor = conn.cursor()
tables = ['wc_teams', 'wc_matches', 'wc_venues']
for table in tables:
cursor.execute(f"SELECT COUNT(*) FROM {table}")
count = cursor.fetchone()[0]
print(f" {table}: {count} 条记录")
conn.close()
if __name__ == '__main__':
main()