#!/usr/bin/env python3 """ 世界杯数据导入脚本 - 将 teams.js 和 schedule.js 数据导入 MySQL - 抓取真实赛程数据 """ import pymysql import json import re import sys import os from datetime import datetime, timedelta # 数据库配置 DB_CONFIG = { 'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'root_password', 'database': 'wx_service', 'charset': 'utf8mb4' } def get_connection(): return pymysql.connect(**DB_CONFIG) def parse_teams_js(filepath): """解析 teams.js 文件""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # 提取数组内容 match = re.search(r'const\s+teams\s*=\s*\[(.*?)\]', content, re.DOTALL) if not match: return [] array_content = match.group(1) # 解析每个对象 teams = [] objects = re.findall(r'\{[^}]+\}', array_content) for obj_str in objects: team = {} # 提取字段 fields = { 'id': r"id:\s*'([^']+)'", 'name': r"name:\s*'([^']+)'", 'name_en': r"name_en:\s*'([^']+)'", 'nickname': r"nickname:\s*'([^']+)'", 'group': r"group:\s*'([^']+)'", 'flag': r"flag:\s*'([^']+)'", 'confederation': r"confederation:\s*'([^']+)'", 'fifa_ranking': r"fifa_ranking:\s*(\d+)", 'best_result': r"best_result:\s*'([^']+)'", 'coach': r"coach:\s*'([^']+)'", 'style': r"style:\s*'([^']+)'" } for key, pattern in fields.items(): m = re.search(pattern, obj_str) if m: value = m.group(1) if key in ('fifa_ranking',): value = int(value) team[key] = value if team.get('id'): teams.append(team) return teams def parse_schedule_js(filepath): """解析 schedule.js 文件""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # 简单解析:找到 schedule 数组 # 由于是动态生成的,我们需要运行JS来获取数据 # 这里我们直接用 Python 生成赛程数据 teams_data = { 'A': ['usa', 'colombia', 'senegal', 'new-zealand'], 'B': ['england', 'iran', 'wales', 'saudi-arabia'], 'C': ['argentina', 'mexico', 'poland', 'australia'], 'D': ['france', 'denmark', 'tunisia', 'peru'], 'E': ['spain', 'germany', 'japan', 'costa-rica'], 'F': ['belgium', 'croatia', 'morocco', 'canada'], 'G': ['brazil', 'switzerland', 'cameroon', 'south-korea'], 'H': ['portugal', 'uruguay', 'ghana', 'ecuador'], 'I': ['netherlands', 'serbia', 'algeria', 'paraguay'], 'J': ['italy', 'egypt', 'nigeria', 'honduras'], 'K': ['czechia', 'turkiye', 'chile', 'china'], 'L': ['austria', 'scotland', 'jamaica', 'qatar'] } venues = [ {'id': 1, 'name': '阿兹特克体育场', 'name_en': 'Estadio Azteca', 'city': '墨西哥城', 'city_en': 'Mexico City', 'country': '墨西哥', 'capacity': 87000}, {'id': 2, 'name': '大都会人寿体育场', 'name_en': 'MetLife Stadium', 'city': '纽约/新泽西', 'city_en': 'New York/New Jersey', 'country': '美国', 'capacity': 82500}, {'id': 3, 'name': 'AT&T体育场', 'name_en': 'AT&T Stadium', 'city': '达拉斯', 'city_en': 'Dallas', 'country': '美国', 'capacity': 80000}, {'id': 4, 'name': '箭头体育场', 'name_en': 'Arrowhead Stadium', 'city': '堪萨斯城', 'city_en': 'Kansas City', 'country': '美国', 'capacity': 76416}, {'id': 5, 'name': 'NRG体育场', 'name_en': 'NRG Stadium', 'city': '休斯敦', 'city_en': 'Houston', 'country': '美国', 'capacity': 72220}, {'id': 6, 'name': '梅赛德斯-奔驰体育场', 'name_en': 'Mercedes-Benz Stadium', 'city': '亚特兰大', 'city_en': 'Atlanta', 'country': '美国', 'capacity': 71000}, {'id': 7, 'name': '索菲体育场', 'name_en': 'SoFi Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 70240}, {'id': 8, 'name': '卢门球场', 'name_en': 'Lumen Field', 'city': '西雅图', 'city_en': 'Seattle', 'country': '美国', 'capacity': 68740}, {'id': 9, 'name': '林肯金融球场', 'name_en': 'Lincoln Financial Field', 'city': '费城', 'city_en': 'Philadelphia', 'country': '美国', 'capacity': 69796}, {'id': 10, 'name': '吉列体育场', 'name_en': 'Gillette Stadium', 'city': '波士顿', 'city_en': 'Boston', 'country': '美国', 'capacity': 65878}, {'id': 11, 'name': '硬石体育场', 'name_en': 'Hard Rock Stadium', 'city': '迈阿密', 'city_en': 'Miami', 'country': '美国', 'capacity': 64767}, {'id': 12, 'name': '李维斯体育场', 'name_en': 'Levi\'s Stadium', 'city': '旧金山湾区', 'city_en': 'San Francisco Bay Area', 'country': '美国', 'capacity': 68500}, {'id': 13, 'name': 'BMO球场', 'name_en': 'BMO Stadium', 'city': '洛杉矶', 'city_en': 'Los Angeles', 'country': '美国', 'capacity': 22000}, {'id': 14, 'name': '蒙特雷体育场', 'name_en': 'Estadio BBVA', 'city': '蒙特雷', 'city_en': 'Monterrey', 'country': '墨西哥', 'capacity': 53500}, {'id': 15, 'name': '瓜达拉哈拉体育场', 'name_en': 'Estadio Akron', 'city': '瓜达拉哈拉', 'city_en': 'Guadalajara', 'country': '墨西哥', 'capacity': 49850}, {'id': 16, 'name': 'BMO球场', 'name_en': 'BMO Field', 'city': '多伦多', 'city_en': 'Toronto', 'country': '加拿大', 'capacity': 30000}, {'id': 17, 'name': 'BC体育馆', 'name_en': 'BC Place', 'city': '温哥华', 'city_en': 'Vancouver', 'country': '加拿大', 'capacity': 54500} ] # 生成小组赛程 group_pairings = [ [(0, 1), (2, 3)], # 第1轮 [(0, 2), (1, 3)], # 第2轮 [(0, 3), (1, 2)] # 第3轮 ] matches = [] match_id = 1 # 开球时间 (UTC) - 每天4场比赛 kickoff_hours = ['16:00:00', '19:00:00', '22:00:00', '01:00:00'] start_date = datetime(2026, 6, 11) for round_idx, pairings in enumerate(group_pairings): for group_idx, (group, teams) in enumerate(teams_data.items()): for pair_idx, (home_idx, away_idx) in enumerate(pairings): # 计算比赛日期 day_offset = round_idx * 6 + group_idx // 2 match_date = start_date + timedelta(days=day_offset) # 计算开球时间 hour_idx = (match_id - 1) % 4 hour = int(kickoff_hours[hour_idx].split(':')[0]) if hour == 1: # 凌晨1点是第二天 match_date += timedelta(days=1) kickoff_utc = match_date.replace(hour=hour, minute=0, second=0) kickoff_beijing = kickoff_utc + timedelta(hours=8) # 选择场馆 venue = venues[(match_id - 1) % len(venues)] matches.append({ 'id': match_id, 'matchday': round_idx + 1, 'stage': 'group', 'group_letter': group, 'home_team_id': None, # 需要根据 team id 映射 'away_team_id': None, 'home_placeholder': teams[home_idx], 'away_placeholder': teams[away_idx], 'kickoff_utc': kickoff_utc, 'kickoff_beijing': kickoff_beijing, 'venue_id': venue['id'], 'venue_city': venue['city'], 'venue_name': venue['name'], 'status': 'scheduled', 'home_score': None, 'away_score': None }) match_id += 1 return matches, venues def import_teams(teams): """导入球队数据""" conn = get_connection() cursor = conn.cursor() # 清空表 cursor.execute("DELETE FROM wc_teams") sql = """ INSERT INTO wc_teams (name, name_en, nickname, group_letter, flag_emoji, confederation, fifa_ranking, best_result, coach, style, created_at, updated_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW()) """ for team in teams: cursor.execute(sql, ( team.get('name'), team.get('name_en'), team.get('nickname'), team.get('group'), team.get('flag'), team.get('confederation'), team.get('fifa_ranking'), team.get('best_result'), team.get('coach'), team.get('style') )) conn.commit() print(f"导入 {len(teams)} 支球队") # 返回球队ID映射 cursor.execute("SELECT id, name_en FROM wc_teams") result = {row[1].lower().replace(' ', '-'): row[0] for row in cursor.fetchall()} conn.close() return result def import_venues(venues): """导入场馆数据""" conn = get_connection() cursor = conn.cursor() # 清空表 cursor.execute("DELETE FROM wc_venues") sql = """ INSERT INTO wc_venues (name, name_en, city, city_en, country, capacity, created_at, updated_at) VALUES (%s, %s, %s, %s, %s, %s, NOW(), NOW()) """ for venue in venues: cursor.execute(sql, ( venue['name'], venue['name_en'], venue['city'], venue['city_en'], venue['country'], venue['capacity'] )) conn.commit() print(f"导入 {len(venues)} 个场馆") conn.close() def import_matches(matches, team_id_map): """导入比赛数据""" conn = get_connection() cursor = conn.cursor() # 清空表 cursor.execute("DELETE FROM wc_matches") sql = """ INSERT INTO wc_matches (matchday, stage, group_letter, home_team_id, away_team_id, home_placeholder, away_placeholder, kickoff_utc, kickoff_beijing, venue_id, venue_city, venue_name, status, home_score, away_score, created_at, updated_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW()) """ for match in matches: # 尝试映射球队ID home_id = team_id_map.get(match['home_placeholder']) away_id = team_id_map.get(match['away_placeholder']) cursor.execute(sql, ( match['matchday'], match['stage'], match['group_letter'], home_id, away_id, match['home_placeholder'], match['away_placeholder'], match['kickoff_utc'], match['kickoff_beijing'], match['venue_id'], match['venue_city'], match['venue_name'], match['status'], match['home_score'], match['away_score'] )) conn.commit() print(f"导入 {len(matches)} 场比赛") conn.close() def main(): print("开始导入世界杯数据...") # 解析 teams.js teams_file = '/home/nepiedg/code/wechat_mini/worldcup/src/data/teams.js' if os.path.exists(teams_file): teams = parse_teams_js(teams_file) print(f"解析到 {len(teams)} 支球队") team_id_map = import_teams(teams) else: print(f"文件不存在: {teams_file}") return # 生成并导入赛程数据 matches, venues = parse_schedule_js(None) import_venues(venues) import_matches(matches, team_id_map) print("\n数据导入完成!") # 验证数据 conn = get_connection() cursor = conn.cursor() tables = ['wc_teams', 'wc_matches', 'wc_venues'] for table in tables: cursor.execute(f"SELECT COUNT(*) FROM {table}") count = cursor.fetchone()[0] print(f" {table}: {count} 条记录") conn.close() if __name__ == '__main__': main()