https://github.com/r1chardj0n3s/parse

使用范例

  1. from parse import parse
  2. before_data = (
  3. 'cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480'
  4. )
  5. print(f"before_data: {before_data}")
  6. # before_data: cookie=0x9816da8e872d717d, duration=298506.364s, table=0, n_packets=480
  7. parse_result = parse('cookie={cookie}, duration={duration}, table={table}, n_packets={n_packets}', before_data)
  8. print(f"parse_result: {parse_result}")
  9. # parse_result: <Result () {'cookie': '0x9816da8e872d717d', 'duration': '298506.364s', 'table': '0', 'n_packets': '480'}>
  10. print(f'cookie: {parse_result["cookie"]}') # cookie: 0x9816da8e872d717d

解析没有定义字段名字的情况

  1. from parse import parse
  2. # 解析没有定义字段名字的情况
  3. before_data_2 = 'I am zaygee, 26 years old'
  4. parse_2_result = parse("i am {}, {} years old", before_data_2)
  5. print(f"parse_2_result: {parse_2_result}")
  6. # parse_2_result: <Result ('zaygee', '26') {}>

解析类似字典的实例的情况

  1. from parse import parse
  2. # 解析类似字典的实例的情况
  3. parse_3_result = parse("i am {name}, {age} years old", before_data_2)
  4. print(f"parse_3_result: {parse_3_result}")
  5. # parse_3_result: <Result () {'name': 'zaygee', 'age': '26'}>
  6. print(parse_3_result["name"]) # zaygee

支持pattern复用

  1. # 支持pattern复用
  2. from parse import compile
  3. pattern = compile("i am {name}, {age} years old")
  4. pattern_result = pattern.parse("I am hoan, 30 years old")
  5. print(f"pattern_result: {pattern_result}")
  6. # pattern_result: <Result () {'name': 'hoan', 'age': '30'}>
  7. """实战小题:
  8. user_infos = [
  9. "username=john, mail=john@baidu.com",
  10. "username=hoan, mail=hoan@baidu.com",
  11. "username=zaygee, mail=zaygee@baidu.com",
  12. "username=poppongj, mail=poppongj@baidu.com",
  13. ]
  14. 现在有一份测试数据 user_infos,需要将以上数据处理为字典输出
  15. 预期结果:
  16. handle_results = [
  17. {"username" : "john", "mail": "john@baidu.com"},
  18. {"username" : "hoan", "mail": "hoan@baidu.com"},
  19. ...
  20. ]
  21. """
  22. from parse import compile
  23. import json
  24. user_infos = [
  25. "username=john, mail=john@baidu.com",
  26. "username=hoan, mail=hoan@baidu.com",
  27. "username=zaygee, mail=zaygee@baidu.com",
  28. "username=poppongj, mail=poppongj@baidu.com",
  29. ]
  30. # user_infos通用匹配规则
  31. user_info_pattern = compile("username={username}, mail={mail}")
  32. # 循环匹配user_infos,并且将匹配结果处理为预期格式添加到handle_results中
  33. handle_results = []
  34. for item in user_infos:
  35. pattern_result = user_info_pattern.parse(item)
  36. print(f"user_info_pattern_result: {pattern_result}")
  37. handle_results.append(
  38. {
  39. "username": pattern_result["username"],
  40. "mail": pattern_result["mail"]
  41. }
  42. )
  43. # 输出预期结果
  44. print(f"handle_results:\n {json.dumps(handle_results, indent=4)}")
  45. # handle_results:
  46. # [
  47. # {
  48. # "username": "john",
  49. # "mail": "john@baidu.com"
  50. # },
  51. # {
  52. # "username": "hoan",
  53. # "mail": "hoan@baidu.com"
  54. # },
  55. # {
  56. # "username": "zaygee",
  57. # "mail": "zaygee@baidu.com"
  58. # },
  59. # {
  60. # "username": "poppongj",
  61. # "mail": "poppongj@baidu.com"
  62. # }
  63. # ]

类型转换

更多类型转换见官方:https://github.com/r1chardj0n3s/parse

  1. """
  2. 解析时的类型转换:
  3. {age:d} --> 将age的值转换为int类型
  4. {date:tg} --> 将date的值转换为datatime e.g. 20/1/1972 10:21:36 AM +1:00
  5. {numbers:f} --> 将numbers的值转换为float类型
  6. """
  7. from parse import parse
  8. str_data = "i am zaygee, 20 years old, now is 6/10/2022 11:00 PM, number is 1.444"
  9. str_data_result = parse(
  10. "i am {name}, {age:d} years old, now is {date:tg}, number is {num:f}",
  11. str_data)
  12. print(f"str_data_result: {str_data_result}")
  13. # str_data_result: <Result () {'name': 'zaygee', 'age': 20, 'date': datetime.datetime(2022, 10, 6, 23, 0), 'num': 1.444}>
  14. print(str_data_result["date"]) # 2022-10-06 23:00:00

去除空格解析

去除两边空格:{name:^} 去除左边空格:{name:>} 去除右边空格:{name:<}

  1. """提取时去除空格"""
  2. space_data = "hello world , hello python "
  3. space_parse_data = parse("hello {world}, hello {python}", space_data)
  4. print(f"space_parse_data: {space_parse_data}")
  5. # space_parse_data: <Result () {'world': ' world ', 'python': ' python '}>
  6. # 提取时去除两边空格:{name:^}
  7. space_parse_data_2 = parse("hello {world:^}, hello {python:^}", space_data)
  8. print(f"space_parse_data_2: {space_parse_data_2}")
  9. # space_parse_data_2: <Result () {'world': 'world', 'python': 'python'}>
  10. # 提取时去除左边空格:{name:>}
  11. left_space_parse_data = parse("hello {world:>}, hello {python:>}", space_data)
  12. print(f"left_space_parse_data: {left_space_parse_data}")
  13. # left_space_parse_data: <Result () {'world': 'world ', 'python': 'python '}>
  14. # 提取时去除右边空格: {name: <}
  15. right_space_parse_data = parse("hello {world:<}, hello {python:<}", space_data)
  16. print(f"right_space_parse_data: {right_space_parse_data}")
  17. # right_space_parse_data: <Result () {'world': ' world', 'python': ' python'}>

设置是否大小写敏感

  1. """
  2. 设置大小写敏感
  3. """
  4. case_sensitive_data = "Zaygee, zaygee"
  5. case_parse_results = parse("zaygee, {}", case_sensitive_data)
  6. print(f"case_parse_results: {case_parse_results}")
  7. # case_parse_results: <Result ('zaygee',) {}>
  8. # 设置大小写敏感
  9. case_parse_results_ = parse("zaygee, {}", case_sensitive_data, case_sensitive=True)
  10. print(f"case_parse_results_: {case_parse_results_}")
  11. # case_parse_results_: None

设置匹配字符数

精准匹配(最大匹配两个字符):{name:.2}/{:.2}
模糊匹配(最少匹配两个字符):{name:2}/{:2}

  1. """匹配字符数"""
  2. # 精确匹配:指定最大字符数 {name:.2}/ {:.2}
  3. accurate_match = parse("{name:.2}{age:.2}", "hello")
  4. print(f'accurate_match: {accurate_match}')
  5. # accurate_match: None
  6. accurate_match = parse("{name:.3}{age:.2}", "hello")
  7. print(f'accurate_match: {accurate_match}')
  8. # accurate_match: <Result () {'name': 'hel', 'age': 'lo'}>
  9. # 模糊匹配: 指定最小字符数 {name:2}/{:2}
  10. blurry_match = parse("{name:3}{age:2}", "hello")
  11. print(f"blurry_match: {blurry_match}")
  12. # blurry_match: <Result () {'name': 'hel', 'age': 'lo'}>
  13. # name最少匹配4个字符,age最少匹配2个字符
  14. blurry_match = parse("{name:4}{age:2}", "hello")
  15. print(f"blurry_match: {blurry_match}")
  16. # blurry_match: None
  17. # 精准/模糊匹配 + 类型转换
  18. # name 最少一个字符数,age最少2个字符数
  19. mix_change_type_match = parse("{name:1}, {age:2d}", "zaygee, 30")
  20. print(f"mix_change_type_match: {mix_change_type_match}")
  21. # mix_change_type_match: <Result () {'name': 'zaygee', 'age': 30}>

重要属性

  1. """
  2. 三个重要属性:fixed、named、spans
  3. fixed: 利用位置提取匿名字段的元组
  4. named: 存放有命名的字段的字典
  5. spans: 存放匹配到的字段的位置
  6. """
  7. profile = parse("i am {}, {age} years old", "i am zaygee, 27 years old")
  8. print(f"profile: {profile}")
  9. # profile: <Result ('zaygee',) {'age': '27'}>
  10. print(f"profile fixed: {profile.fixed}")
  11. # profile fixed: ('zaygee',)
  12. print(f'profile named : {profile.named}')
  13. # profile named : {'age': '27'}
  14. print(f'profile spans: {profile.spans}')
  15. # profile spans: {'age': (13, 15), 0: (5, 11)}

自定义类型转换

  1. """
  2. 自定义类型的转换
  3. """
  4. def change_dict(item):
  5. """将匹配的字符串以字典的形式返回
  6. params item: 匹配的字符串
  7. type item: str eg: zaygee
  8. """
  9. return {item: item}
  10. customize_data = parse(
  11. "i am {:change_d}, {:change_d} years old",
  12. "i am zaygee, 20 years old",
  13. dict(change_d=change_dict)
  14. )
  15. print(f'customize_data: {customize_data}')
  16. # customize_data: <Result ({'zaygee': 'zaygee'}, {'20': '20'}) {}>