利用 Python-user-agents 解析 User_Agent
需求分析
近期在尝试做一个登录日志的功能,及用户登录成功后我在后台进行一个用户的登录记录,两种解决方案:
- 由前端得到用户的手机型号,我在后台接收后在数据库进行保存
- 使用
User_Agent
, 它通过解析(浏览器/HTTP) user agent 字符串,提供了一种简单的方法,来识别/检测手机、平板等设备及其功能。目标是可靠地检测:设备是手机,平板还是电脑;是否有触摸屏。
用法
各种基本信息可以帮忙识别访问者,比如设备,操作系统,浏览器等属性
from user_agents import parse
# iPhone's user agent string
ua_string = 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3'
user_agent = parse(ua_string) # 解析成user_agent
# Accessing user agent's browser attributes
user_agent.browser # returns Browser(family=u'Mobile Safari', version=(5, 1), version_string='5.1')
user_agent.browser.family # returns 'Mobile Safari'
user_agent.browser.version # returns (5, 1)
user_agent.browser.version_string # returns '5.1'
# Accessing user agent's operating system properties
user_agent.os # returns OperatingSystem(family=u'iOS', version=(5, 1), version_string='5.1')
user_agent.os.family # returns 'iOS'
user_agent.os.version # returns (5, 1)
user_agent.os.version_string # returns '5.1'
# Accessing user agent's device properties
user_agent.device # returns Device(family=u'iPhone', brand=u'Apple', model=u'iPhone')
user_agent.device.family # returns 'iPhone'
user_agent.device.brand # returns 'Apple'
user_agent.device.model # returns 'iPhone'
# Viewing a pretty string version
str(user_agent) # returns "iPhone / iOS 5.1 / Mobile Safari 5.1"
# 最后这个最好用
目前还支持这些属性:
- is_mobile:判断是不是手机
- is_tablet:判断是不是平板
- is_pc:判断是不是电脑
- is_touch_capable:有没有触屏功能
- is_bot:是不是搜索引擎的爬虫
from user_agents import parse
# Let's start from an old, non touch Blackberry device
ua_string = 'BlackBerry9700/5.0.0.862 Profile/MIDP-2.1 Configuration/CLDC-1.1 VendorID/331 UNTRUSTED/1.0 3gpp-gba'
user_agent = parse(ua_string)
user_agent.is_mobile # returns True
user_agent.is_tablet # returns False
user_agent.is_touch_capable # returns False
user_agent.is_pc # returns False
user_agent.is_bot # returns False
str(user_agent) # returns "BlackBerry 9700 / BlackBerry OS 5 / BlackBerry 9700"
常见机型映射字典
map_phone = {'Apple': 'Apple', 'KIW-AL10': 'Huawei', 'PRA-TL10': 'Huawei', 'BND-AL00': 'Huawei', 'XiaoMi': 'XiaoMi', 'MIX 2': 'XiaoMi', 'Oppo': 'Oppo', ' Oppo': 'Oppo', 'Gionee': 'Gionee', 'Samsung': 'Samsung', 'PRA-AL00X': 'Huawei', 'PACM00': 'Oppo', 'PBET00': 'Oppo', 'R7Plusm': 'Oppo', 'PAAT00': 'Oppo', 'PBAM00': 'Oppo', 'PADM00': 'Oppo', 'PAFM00': 'Oppo', 'PBEM00': 'Oppo', 'PAAM00': 'Oppo', 'PBBM00': 'Oppo', 'PACT00': 'Oppo', 'V1809A': 'vivo', 'PBAT00': 'Oppo', 'PADT00': 'Oppo', 'BND-TL10': 'Huawei', 'PBBT00': ' Oppo', 'PBCM10': 'Oppo', 'Mi Note 3': 'XiaoMi', 'V1816A': 'vivo', 'V1732T': 'vivo', 'V1813A': 'vivo', 'V1732A': 'vivo', 'V1818A':'vivo','CAM-TL00':'Huawei','Le X620':'leshi','M6 Note':'meizu','m3 note':'meizu','M5':'meizu','M1 E ':'meizu','BLN-AL10':'Huawei','M5 Note':'meizu','PRA-AL00':'honour','LND-AL30':'honour','NEM-AL10':'honour','BND-AL10':'honour','CAM-AL00':'honour','SCL-TL00':'honour','LLD-AL30':'honour','BLN-AL20':'honour','AUM-AL20':'honour','JSN-AL00':'honour','LLD-AL10':'honour','BLN-TL10':'honour','LLD-AL20':'honour','BLN-AL40':'honour','MYA-AL10':'honour','LLD-AL00':'honour','JSN-AL00a':'honour','JMM-AL10':'honour','DLI-AL10':'honour','JMM-AL00':'honour','V1809T':'vivo','LND-AL40':'honour','PLK-AL10':'honour','MX6':'meizu','PLK-TL01H':'honour','S9':'Samsung','KIW-TL00':'honour','V1813T':'vivo'}
- 常见的User_Agent各字段的解释
- Mozilla/5.0: 网景公司浏览器的标识,由于互联网初期浏览器市场主要被网景公司占领,很多服务器被设置成仅响应含有标志为Mozilla的浏览器的请求,因此,新款的浏览器为了打入市场,不得不加上这个字段。
- Windows NT 6.3 : Windows 8.1的标识符
- WOW64: 32位的Windows系统运行在64位的处理器上
- AppleWebKit/537.36:苹果公司开发的呈现引擎
- KHTML:是Linux平台中Konqueror浏览器的呈现引擎KHTML
- Geckeo:呈现引擎
- like Gecko:表示其行为与Gecko浏览器引擎类似
- 请求中为什么既含有Chrome/33.0.1750.29又含有Safari/537.36字段?
因为AppleWebKit渲染引擎是苹果公司开发的,而Google公司要采用它,为了获得服务器端的正确响应,仅在Safari浏览器UA字段中增加了Chrome字段。
例如:- Safari浏览器的UA:Mozilla/5.0 (平台;加密类型;操作系统或CPU;语言)AppleWebKit/AppleWebKit版本号(KHTML, like Gecko) Safari/Safari 版本号
- Chrome浏览器的UA:Mozilla/5.0 (平台;加密类型;操作系统或CPU;语言)AppleWebKit/AppleWebKit版本号 (KHTML, like Gecko) Chrome/
- Chrome 版本号 Safari/Safari 版本号
- 为什么UA中包含多个浏览器的标识,如:Mozilla/5.0、Chrome/33.0.1750.29、Safari/537.36,以及渲染引擎标识?
多增加一些字段都是为了让服务器检测到它支持的浏览器标识,以便获得服务器的响应,从而提升用户体验。
这里有一个demo代码请参考
"""
Request工具类
"""
import json
import logging
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.auth.models import AnonymousUser
from django.core.cache import cache
from django.urls.resolvers import ResolverMatch
from user_agents import parse
from apps.vadmin.utils.authentication import OpAuthJwtAuthentication
logger = logging.getLogger(__name__)
def get_request_user(request, authenticate=True):
"""
获取请求user
(1)如果request里的user没有认证,那么则手动认证一次
:param request:
:param authenticate:
:return:
"""
user: AbstractBaseUser = getattr(request, 'user', None)
if user and user.is_authenticated:
return user
try:
user, tokrn = OpAuthJwtAuthentication().authenticate(request)
except Exception as e:
pass
return user or AnonymousUser()
def get_request_ip(request):
"""
获取请求IP
:param request:
:return:
"""
ip = getattr(request, 'request_ip', None)
if ip:
return ip
ip = request.META.get('REMOTE_ADDR', '')
if not ip:
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR', '')
if x_forwarded_for:
ip = x_forwarded_for.split(',')[-1].strip()
else:
ip = 'unknown'
return ip
def get_request_data(request):
"""
获取请求参数
:param request:
:return:
"""
request_data = getattr(request, 'request_data', None)
if request_data:
return request_data
data: dict = {**request.GET.dict(), **request.POST.dict()}
if not data:
try:
body = request.body
if body:
data = json.loads(body)
except Exception as e:
pass
if not isinstance(data, dict):
data = {'data': data}
return data
def get_request_path(request, *args, **kwargs):
"""
获取请求路径
:param request:
:param args:
:param kwargs:
:return:
"""
request_path = getattr(request, 'request_path', None)
if request_path:
return request_path
values = []
for arg in args:
if len(arg) == 0:
continue
if isinstance(arg, str):
values.append(arg)
elif isinstance(arg, (tuple, set, list)):
values.extend(arg)
elif isinstance(arg, dict):
values.extend(arg.values())
if len(values) == 0:
return request.path
path: str = request.path
for value in values:
path = path.replace('/' + value, '/' + '{id}')
return path
def get_request_canonical_path(request, *args, **kwargs):
"""
获取请求路径
:param request:
:param args:
:param kwargs:
:return:
"""
request_path = getattr(request, 'request_canonical_path', None)
if request_path:
return request_path
path: str = request.path
resolver_match: ResolverMatch = request.resolver_match
for value in resolver_match.args:
path = path.replace(f"/{value}", "/{id}")
for key, value in resolver_match.kwargs.items():
if key == 'pk':
path = path.replace(f"/{value}", f"/{{id}}")
continue
path = path.replace(f"/{value}", f"/{{{key}}}")
return path
def get_browser(request, *args, **kwargs):
"""
获取浏览器名
:param request:
:param args:
:param kwargs:
:return:
"""
ua_string = request.META['HTTP_USER_AGENT']
user_agent = parse(ua_string)
return user_agent.get_browser()
def get_os(request, *args, **kwargs):
"""
获取操作系统
:param request:
:param args:
:param kwargs:
:return:
"""
ua_string = request.META['HTTP_USER_AGENT']
user_agent = parse(ua_string)
return user_agent.get_os()
def get_login_location(request, *args, **kwargs):
"""
获取ip 登录位置
:param request:
:param args:
:param kwargs:
:return:
"""
import requests
import eventlet # 导入eventlet这个模块
request_ip = get_request_ip(request)
# 从缓存中获取
location = cache.get(request_ip)
if location:
return location
# 通过api 获取,再缓存redis
try:
eventlet.monkey_patch(thread=False) # 必须加这条代码
with eventlet.Timeout(2, False): # 设置超时时间为2秒
apiurl = "http://whois.pconline.com.cn/ip.jsp?ip=%s" % request_ip
r = requests.get(apiurl)
content = r.content.decode('GBK')
location = str(content).replace('\r', '').replace('\n', '')[:64]
cache.set(request_ip, location, 86400)
return location
except Exception as e:
pass
return ""
def get_verbose_name(queryset=None, view=None, model=None):
"""
获取 verbose_name
:param request:
:param view:
:return:
"""
try:
if queryset and hasattr(queryset, 'model'):
model = queryset.model
elif view and hasattr(view.get_queryset(), 'model'):
model = view.get_queryset().model
elif view and hasattr(view.get_serializer(), 'Meta') and hasattr(view.get_serializer().Meta, 'model'):
model = view.get_serializer().Meta.model
if model:
return getattr(model, '_meta').verbose_name
except Exception as e:
pass
return ""