使用范例
from parse import parse
before_data = (
'cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480'
)
print(f"before_data: {before_data}")
# before_data: cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480
parse_result = parse('cookie={cookie}, duration={duration}, table={table}, n_packets={n_packets}', before_data)
print(f"parse_result: {parse_result}")
# parse_result: <Result () {'cookie': '0x9816da8e872d717d', 'duration': '298506.364s', 'table': '0', 'n_packets': '480'}>
print(f'cookie: {parse_result["cookie"]}') # cookie: 0x9816da8e872d717d
解析没有定义字段名字的情况
from parse import parse
# 解析没有定义字段名字的情况
before_data_2 = 'I am zaygee, 26 years old'
parse_2_result = parse("i am {}, {} years old", before_data_2)
print(f"parse_2_result: {parse_2_result}")
# parse_2_result: <Result ('zaygee', '26') {}>
解析类似字典的实例的情况
from parse import parse
# 解析类似字典的实例的情况
parse_3_result = parse("i am {name}, {age} years old", before_data_2)
print(f"parse_3_result: {parse_3_result}")
# parse_3_result: <Result () {'name': 'zaygee', 'age': '26'}>
print(parse_3_result["name"]) # zaygee
支持pattern复用
# 支持pattern复用
from parse import compile
pattern = compile("i am {name}, {age} years old")
pattern_result = pattern.parse("I am hoan, 30 years old")
print(f"pattern_result: {pattern_result}")
# pattern_result: <Result () {'name': 'hoan', 'age': '30'}>
"""实战小题:
user_infos = [
"username=john, mail=john@baidu.com",
"username=hoan, mail=hoan@baidu.com",
"username=zaygee, mail=zaygee@baidu.com",
"username=poppongj, mail=poppongj@baidu.com",
]
现在有一份测试数据 user_infos,需要将以上数据处理为字典输出
预期结果:
handle_results = [
{"username" : "john", "mail": "john@baidu.com"},
{"username" : "hoan", "mail": "hoan@baidu.com"},
...
]
"""
from parse import compile
import json
user_infos = [
"username=john, mail=john@baidu.com",
"username=hoan, mail=hoan@baidu.com",
"username=zaygee, mail=zaygee@baidu.com",
"username=poppongj, mail=poppongj@baidu.com",
]
# user_infos通用匹配规则
user_info_pattern = compile("username={username}, mail={mail}")
# 循环匹配user_infos,并且将匹配结果处理为预期格式添加到handle_results中
handle_results = []
for item in user_infos:
pattern_result = user_info_pattern.parse(item)
print(f"user_info_pattern_result: {pattern_result}")
handle_results.append(
{
"username": pattern_result["username"],
"mail": pattern_result["mail"]
}
)
# 输出预期结果
print(f"handle_results:\n {json.dumps(handle_results, indent=4)}")
# handle_results:
# [
# {
# "username": "john",
# "mail": "john@baidu.com"
# },
# {
# "username": "hoan",
# "mail": "hoan@baidu.com"
# },
# {
# "username": "zaygee",
# "mail": "zaygee@baidu.com"
# },
# {
# "username": "poppongj",
# "mail": "poppongj@baidu.com"
# }
# ]
类型转换
更多类型转换见官方:https://github.com/r1chardj0n3s/parse
"""
解析时的类型转换:
{age:d} --> 将age的值转换为int类型
{date:tg} --> 将date的值转换为datatime e.g. 20/1/1972 10:21:36 AM +1:00
{numbers:f} --> 将numbers的值转换为float类型
"""
from parse import parse
str_data = "i am zaygee, 20 years old, now is 6/10/2022 11:00 PM, number is 1.444"
str_data_result = parse(
"i am {name}, {age:d} years old, now is {date:tg}, number is {num:f}",
str_data)
print(f"str_data_result: {str_data_result}")
# str_data_result: <Result () {'name': 'zaygee', 'age': 20, 'date': datetime.datetime(2022, 10, 6, 23, 0), 'num': 1.444}>
print(str_data_result["date"]) # 2022-10-06 23:00:00
去除空格解析
去除两边空格:{name:^} 去除左边空格:{name:>} 去除右边空格:{name:<}
"""提取时去除空格"""
space_data = "hello world , hello python "
space_parse_data = parse("hello {world}, hello {python}", space_data)
print(f"space_parse_data: {space_parse_data}")
# space_parse_data: <Result () {'world': ' world ', 'python': ' python '}>
# 提取时去除两边空格:{name:^}
space_parse_data_2 = parse("hello {world:^}, hello {python:^}", space_data)
print(f"space_parse_data_2: {space_parse_data_2}")
# space_parse_data_2: <Result () {'world': 'world', 'python': 'python'}>
# 提取时去除左边空格:{name:>}
left_space_parse_data = parse("hello {world:>}, hello {python:>}", space_data)
print(f"left_space_parse_data: {left_space_parse_data}")
# left_space_parse_data: <Result () {'world': 'world ', 'python': 'python '}>
# 提取时去除右边空格: {name: <}
right_space_parse_data = parse("hello {world:<}, hello {python:<}", space_data)
print(f"right_space_parse_data: {right_space_parse_data}")
# right_space_parse_data: <Result () {'world': ' world', 'python': ' python'}>
设置是否大小写敏感
"""
设置大小写敏感
"""
case_sensitive_data = "Zaygee, zaygee"
case_parse_results = parse("zaygee, {}", case_sensitive_data)
print(f"case_parse_results: {case_parse_results}")
# case_parse_results: <Result ('zaygee',) {}>
# 设置大小写敏感
case_parse_results_ = parse("zaygee, {}", case_sensitive_data, case_sensitive=True)
print(f"case_parse_results_: {case_parse_results_}")
# case_parse_results_: None
设置匹配字符数
精准匹配(最大匹配两个字符):{name:.2}/{:.2}
模糊匹配(最少匹配两个字符):{name:2}/{:2}
"""匹配字符数"""
# 精确匹配:指定最大字符数 {name:.2}/ {:.2}
accurate_match = parse("{name:.2}{age:.2}", "hello")
print(f'accurate_match: {accurate_match}')
# accurate_match: None
accurate_match = parse("{name:.3}{age:.2}", "hello")
print(f'accurate_match: {accurate_match}')
# accurate_match: <Result () {'name': 'hel', 'age': 'lo'}>
# 模糊匹配: 指定最小字符数 {name:2}/{:2}
blurry_match = parse("{name:3}{age:2}", "hello")
print(f"blurry_match: {blurry_match}")
# blurry_match: <Result () {'name': 'hel', 'age': 'lo'}>
# name最少匹配4个字符,age最少匹配2个字符
blurry_match = parse("{name:4}{age:2}", "hello")
print(f"blurry_match: {blurry_match}")
# blurry_match: None
# 精准/模糊匹配 + 类型转换
# name 最少一个字符数,age最少2个字符数
mix_change_type_match = parse("{name:1}, {age:2d}", "zaygee, 30")
print(f"mix_change_type_match: {mix_change_type_match}")
# mix_change_type_match: <Result () {'name': 'zaygee', 'age': 30}>
重要属性
"""
三个重要属性:fixed、named、spans
fixed: 利用位置提取匿名字段的元组
named: 存放有命名的字段的字典
spans: 存放匹配到的字段的位置
"""
profile = parse("i am {}, {age} years old", "i am zaygee, 27 years old")
print(f"profile: {profile}")
# profile: <Result ('zaygee',) {'age': '27'}>
print(f"profile fixed: {profile.fixed}")
# profile fixed: ('zaygee',)
print(f'profile named : {profile.named}')
# profile named : {'age': '27'}
print(f'profile spans: {profile.spans}')
# profile spans: {'age': (13, 15), 0: (5, 11)}
自定义类型转换
"""
自定义类型的转换
"""
def change_dict(item):
"""将匹配的字符串以字典的形式返回
params item: 匹配的字符串
type item: str eg: zaygee
"""
return {item: item}
customize_data = parse(
"i am {:change_d}, {:change_d} years old",
"i am zaygee, 20 years old",
dict(change_d=change_dict)
)
print(f'customize_data: {customize_data}')
# customize_data: <Result ({'zaygee': 'zaygee'}, {'20': '20'}) {}>