python 读取发票信息;python 电子发票;python 纸质发票;python 发票;
利用百度免费的api提取发票信息。可以提取pdf或者拍照的png图片。
import requestsimport base64# pip install PyMuPDF# 安装PyMuPDF才能用fitzimport fitz# fileName : string# 例如:"test.png",也可以是pdfclass InvoiceParser:def __init__(self, fileName):self.fileName = fileNameself.response = {}self.invoiceInfo = {"发票号码": -1,"开票日期": -1,"发票金额(含税)": -1,"销售方名称": -1,"购买物品": -1}self._set_up()def _set_up(self):self.get_response()# 录入二维码发票信息self.invoiceInfo["发票号码"] = self.response["InvoiceNum"]self.invoiceInfo["开票日期"] = self.response["InvoiceDate"]self.invoiceInfo["发票金额(含税)"] = self.response["AmountInFiguers"]self.invoiceInfo["销售方名称"] = self.response["SellerName"]self.invoiceInfo["购买物品"] = self.response["CommodityName"]# 通过百度云识别发票信息def get_response(self):# 发票文件转换为二进制f = open(self.fileName, 'rb')invoiceFile = base64.b64encode(f.read())# 设置urlrequest_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"access_token = "自己获取"request_url = request_url + "?access_token=" + access_token# 设置其他参数headers = {'content-type': 'application/x-www-form-urlencoded'}# 默认是图片params = {"image": invoiceFile}# 如果是pdf转pdfif ".pdf" in self.fileName:params = {"pdf_file": invoiceFile}response = requests.post(request_url, data=params, headers=headers)# 保存到selfself.response = response.json()['words_result']return self.responsedef GetAccessToken():apiKey = "自己获取"secretKet = "自己获取"url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}&".format(apiKey, secretKet)response = requests.post(url=url)return eval(response.text)["access_token"]if __name__ == '__main__':# 获取get_response()函数中的access_token# 获取一次就可以注释掉了# print(GetAccessToken())tmp = InvoiceParser("u盘.png")print(str(tmp.invoiceInfo))# print(str(tmp.response))
