第三方库 - parse-解析规范字符串 - 《Python 基础教程》

解析没有定义字段名字的情况
解析类似字典的实例的情况
支持pattern复用
类型转换
去除空格解析
设置是否大小写敏感
设置匹配字符数
重要属性
自定义类型转换

https://github.com/r1chardj0n3s/parse

使用范例

from parse import parse
before_data = (
    'cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480'
            )   
print(f"before_data: {before_data}")
# before_data: cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480
parse_result = parse('cookie={cookie}, duration={duration}, table={table}, n_packets={n_packets}', before_data)
print(f"parse_result: {parse_result}")
# parse_result: <Result () {'cookie': '0x9816da8e872d717d', 'duration': '298506.364s', 'table': '0', 'n_packets': '480'}>
print(f'cookie: {parse_result["cookie"]}')  # cookie: 0x9816da8e872d717d

解析没有定义字段名字的情况

from parse import parse
# 解析没有定义字段名字的情况
before_data_2 = 'I am zaygee, 26 years old' 
parse_2_result = parse("i am {}, {} years old", before_data_2)
print(f"parse_2_result: {parse_2_result}")  
# parse_2_result: <Result ('zaygee', '26') {}>

解析类似字典的实例的情况

from parse import parse
# 解析类似字典的实例的情况
parse_3_result = parse("i am {name}, {age} years old", before_data_2)
print(f"parse_3_result: {parse_3_result}")  
# parse_3_result: <Result () {'name': 'zaygee', 'age': '26'}>
print(parse_3_result["name"])   # zaygee

支持pattern复用

# 支持pattern复用
from parse import compile
pattern = compile("i am {name}, {age} years old")
pattern_result = pattern.parse("I am hoan, 30 years old")
print(f"pattern_result: {pattern_result}")  
# pattern_result: <Result () {'name': 'hoan', 'age': '30'}>
"""实战小题： 
user_infos = [
    "username=john, mail=john@baidu.com",
    "username=hoan, mail=hoan@baidu.com",
    "username=zaygee, mail=zaygee@baidu.com",
    "username=poppongj, mail=poppongj@baidu.com",
]
现在有一份测试数据 user_infos，需要将以上数据处理为字典输出
预期结果：
    handle_results = [
        {"username" : "john", "mail": "john@baidu.com"},
        {"username" : "hoan", "mail": "hoan@baidu.com"},
        ...
    ]
"""
from parse import compile
import json
user_infos = [
    "username=john, mail=john@baidu.com",
    "username=hoan, mail=hoan@baidu.com",
    "username=zaygee, mail=zaygee@baidu.com",
    "username=poppongj, mail=poppongj@baidu.com",
]
# user_infos通用匹配规则
user_info_pattern = compile("username={username}, mail={mail}")
# 循环匹配user_infos，并且将匹配结果处理为预期格式添加到handle_results中
handle_results = []
for item in user_infos: 
    pattern_result = user_info_pattern.parse(item)
    print(f"user_info_pattern_result: {pattern_result}")
    handle_results.append(
        {
            "username": pattern_result["username"],
            "mail": pattern_result["mail"]
        }
    )
# 输出预期结果
print(f"handle_results:\n {json.dumps(handle_results, indent=4)}")
# handle_results:
#  [
#     {
#         "username": "john",
#         "mail": "john@baidu.com"
#     },
#     {
#         "username": "hoan",
#         "mail": "hoan@baidu.com"
#     },
#     {
#         "username": "zaygee",
#         "mail": "zaygee@baidu.com"
#     },
#     {
#         "username": "poppongj",
#         "mail": "poppongj@baidu.com"
#     }
# ]

类型转换

更多类型转换见官方：https://github.com/r1chardj0n3s/parse

"""
解析时的类型转换：
    {age:d} --> 将age的值转换为int类型
    {date:tg} --> 将date的值转换为datatime e.g. 20/1/1972 10:21:36 AM +1:00
    {numbers:f} --> 将numbers的值转换为float类型
"""
from parse import parse
str_data = "i am zaygee, 20 years old, now is  6/10/2022 11:00 PM, number is 1.444"
str_data_result = parse(
    "i am {name}, {age:d} years old, now is  {date:tg}, number is {num:f}", 
    str_data)
print(f"str_data_result: {str_data_result}")
# str_data_result: <Result () {'name': 'zaygee', 'age': 20, 'date': datetime.datetime(2022, 10, 6, 23, 0), 'num': 1.444}>
print(str_data_result["date"])  # 2022-10-06 23:00:00

去除空格解析

去除两边空格：{name:^} 去除左边空格：{name:>} 去除右边空格：{name:<}


"""提取时去除空格"""
space_data = "hello     world    , hello    python    "
space_parse_data = parse("hello {world}, hello {python}", space_data)
print(f"space_parse_data: {space_parse_data}")
# space_parse_data: <Result () {'world': '    world    ', 'python': '   python    '}>
# 提取时去除两边空格：{name:^}
space_parse_data_2 = parse("hello {world:^}, hello {python:^}", space_data)
print(f"space_parse_data_2: {space_parse_data_2}")
# space_parse_data_2: <Result () {'world': 'world', 'python': 'python'}>
# 提取时去除左边空格：{name:>}
left_space_parse_data = parse("hello {world:>}, hello {python:>}", space_data)
print(f"left_space_parse_data: {left_space_parse_data}")
# left_space_parse_data: <Result () {'world': 'world    ', 'python': 'python    '}>
# 提取时去除右边空格： {name: <}
right_space_parse_data = parse("hello {world:<}, hello {python:<}", space_data)
print(f"right_space_parse_data: {right_space_parse_data}")
# right_space_parse_data: <Result () {'world': '    world', 'python': '   python'}>

设置是否大小写敏感

"""
设置大小写敏感
"""
case_sensitive_data = "Zaygee, zaygee"
case_parse_results = parse("zaygee, {}", case_sensitive_data)
print(f"case_parse_results: {case_parse_results}")  
# case_parse_results: <Result ('zaygee',) {}>
# 设置大小写敏感
case_parse_results_ = parse("zaygee, {}", case_sensitive_data, case_sensitive=True)
print(f"case_parse_results_: {case_parse_results_}")
# case_parse_results_: None

设置匹配字符数

精准匹配(最大匹配两个字符)：{name:.2}/{:.2}
模糊匹配(最少匹配两个字符)：{name:2}/{:2}

"""匹配字符数"""
# 精确匹配：指定最大字符数 {name:.2}/ {:.2}
accurate_match = parse("{name:.2}{age:.2}", "hello")
print(f'accurate_match: {accurate_match}')
# accurate_match: None
accurate_match = parse("{name:.3}{age:.2}", "hello")
print(f'accurate_match: {accurate_match}')
# accurate_match: <Result () {'name': 'hel', 'age': 'lo'}>
# 模糊匹配: 指定最小字符数 {name:2}/{:2}
blurry_match = parse("{name:3}{age:2}", "hello")
print(f"blurry_match: {blurry_match}")
# blurry_match: <Result () {'name': 'hel', 'age': 'lo'}>
# name最少匹配4个字符，age最少匹配2个字符
blurry_match = parse("{name:4}{age:2}", "hello")
print(f"blurry_match: {blurry_match}")
# blurry_match: None
# 精准/模糊匹配 + 类型转换
# name 最少一个字符数，age最少2个字符数
mix_change_type_match = parse("{name:1}, {age:2d}", "zaygee, 30")
print(f"mix_change_type_match: {mix_change_type_match}")
# mix_change_type_match: <Result () {'name': 'zaygee', 'age': 30}>

重要属性

"""
三个重要属性：fixed、named、spans
fixed: 利用位置提取匿名字段的元组
named: 存放有命名的字段的字典
spans: 存放匹配到的字段的位置
"""
profile = parse("i am {}, {age} years old", "i am zaygee, 27 years old")
print(f"profile: {profile}")
# profile: <Result ('zaygee',) {'age': '27'}>
print(f"profile fixed: {profile.fixed}")
# profile fixed: ('zaygee',)
print(f'profile named : {profile.named}')
# profile named : {'age': '27'}
print(f'profile spans: {profile.spans}')
# profile spans: {'age': (13, 15), 0: (5, 11)}

自定义类型转换

"""
自定义类型的转换
"""
def change_dict(item):
    """将匹配的字符串以字典的形式返回
    params item: 匹配的字符串
    type item: str eg: zaygee
    """
    return {item: item}
customize_data = parse(
    "i am {:change_d}, {:change_d} years old", 
    "i am zaygee, 20 years old", 
    dict(change_d=change_dict)
    )
print(f'customize_data: {customize_data}')
# customize_data: <Result ({'zaygee': 'zaygee'}, {'20': '20'}) {}>