python 读取发票信息;python 电子发票;python 纸质发票;python 发票;
利用百度免费的api提取发票信息。可以提取pdf或者拍照的png图片。
import requests
import base64
# pip install PyMuPDF
# 安装PyMuPDF才能用fitz
import fitz
# fileName : string
# 例如:"test.png",也可以是pdf
class InvoiceParser:
def __init__(self, fileName):
self.fileName = fileName
self.response = {}
self.invoiceInfo = {"发票号码": -1,
"开票日期": -1,
"发票金额(含税)": -1,
"销售方名称": -1,
"购买物品": -1}
self._set_up()
def _set_up(self):
self.get_response()
# 录入二维码发票信息
self.invoiceInfo["发票号码"] = self.response["InvoiceNum"]
self.invoiceInfo["开票日期"] = self.response["InvoiceDate"]
self.invoiceInfo["发票金额(含税)"] = self.response["AmountInFiguers"]
self.invoiceInfo["销售方名称"] = self.response["SellerName"]
self.invoiceInfo["购买物品"] = self.response["CommodityName"]
# 通过百度云识别发票信息
def get_response(self):
# 发票文件转换为二进制
f = open(self.fileName, 'rb')
invoiceFile = base64.b64encode(f.read())
# 设置url
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
access_token = "自己获取"
request_url = request_url + "?access_token=" + access_token
# 设置其他参数
headers = {'content-type': 'application/x-www-form-urlencoded'}
# 默认是图片
params = {"image": invoiceFile}
# 如果是pdf转pdf
if ".pdf" in self.fileName:
params = {"pdf_file": invoiceFile}
response = requests.post(request_url, data=params, headers=headers)
# 保存到self
self.response = response.json()['words_result']
return self.response
def GetAccessToken():
apiKey = "自己获取"
secretKet = "自己获取"
url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}&".format(
apiKey, secretKet)
response = requests.post(url=url)
return eval(response.text)["access_token"]
if __name__ == '__main__':
# 获取get_response()函数中的access_token
# 获取一次就可以注释掉了
# print(GetAccessToken())
tmp = InvoiceParser("u盘.png")
print(str(tmp.invoiceInfo))
# print(str(tmp.response))