data_factory/test_data/field_rules/province_rules.py

116 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2025/3/15 19:52
# @Author : AngesZhu
# @File : province_rules.py
# @Desc : 省份相关规则
import random
import re
from typing import Dict, List, Optional, Tuple
from functools import lru_cache
# 不可变的基础数据
PROVINCE_DATA = (
("北京", "", "北京", "010", "华北"),
("天津", "", "天津", "022", "华北"),
("河北", "", "石家庄", "0311", "华北"),
# ... 其他省份数据
("台湾", "", "台北", "00886", "华东"),
("香港", "", "香港", "00852", "华南"),
("澳门", "", "澳门", "00853", "华南")
)
# 类型别名
Province = Tuple[str, str, str, str, str]
ProvinceDict = Dict[str, str]
# 纯函数:数据转换
def create_mappings(data: Tuple[Province, ...]) -> Tuple[Dict[str, Province], Dict[str, Province]]:
"""创建名称和简称的映射"""
name_map = {p[0]: p for p in data}
abbr_map = {p[1]: p for p in data}
return name_map, abbr_map
# 初始化映射
NAME_MAP, ABBR_MAP = create_mappings(PROVINCE_DATA)
ALL_VALID_NAMES = frozenset(NAME_MAP.keys()) | frozenset(ABBR_MAP.keys())
# 纯函数:输入清理
def clean_input(input_str: str) -> str:
"""清理和标准化输入字符串"""
if not input_str or not isinstance(input_str, str):
return ""
cleaned = input_str.strip().translate(str.maketrans('', '()'))
return re.sub(r'\s+', '', cleaned)
# 带缓存的函数:省份验证
@lru_cache(maxsize=128)
def validate_province(input_str: str) -> Optional[Province]:
"""验证并返回省份信息"""
cleaned = clean_input(input_str)
if not cleaned:
return None
# 处理带后缀的情况
def remove_suffix(text: str, suffix: str) -> Optional[Province]:
if text.endswith(suffix) and len(text) > len(suffix):
base = text[:-len(suffix)]
return NAME_MAP.get(base)
return None
# 尝试各种匹配方式
return (NAME_MAP.get(cleaned) or
ABBR_MAP.get(cleaned) or
remove_suffix(cleaned, '') or
remove_suffix(cleaned, '自治区') or
remove_suffix(cleaned, '特别行政区'))
# 纯函数:随机选择
def get_random_province(region: Optional[str] = None) -> Province:
"""随机获取一个省份"""
pool = [p for p in PROVINCE_DATA if not region or p[4] == region]
if not pool:
raise ValueError(f"无效的地区:{region}")
return random.choice(pool)
# 纯函数:信息转换
def province_to_dict(province: Province) -> ProvinceDict:
"""将省份元组转换为字典"""
keys = ("name", "abbr", "capital", "area_code", "region")
return dict(zip(keys, province))
# 组合函数:获取完整信息
def get_province_info(input_str: str) -> ProvinceDict:
"""获取省份的完整信息"""
province = validate_province(input_str)
if not province:
raise ValueError(f"无效的省份输入:{input_str}")
return province_to_dict(province)
# 使用示例
if __name__ == "__main__":
# 随机获取一个省份
random_prov = get_random_province()
print(f"随机省份:{random_prov}")
# 验证省份
test_cases = ["北京", "上海市", "", "", "台湾省", "invalid"]
for case in test_cases:
result = validate_province(case)
print(f"验证 '{case}': {'有效' if result else '无效'}")
# 获取省份信息
try:
info = get_province_info("广东")
print("\n广东省信息:")
print(info)
except ValueError as e:
print(e)