https://blog.csdn.net/zhouz92/article/details/107179616
from docx import Document
# Document 类,不仅可以新建word文档,也可以打开一个本地文档
doc = Document('test03.docx')
tables = doc.tables # 获取文档中所有表格对象的列表
print(tables)
# [<docx.table.Table object at 0x000001957059CD48>]
print(len(tables)) # 查看文档中表格数量
# 1
table0 = tables[0] # 获取表格对象
# 获取表格的样式信息
print(table0.style)
# _TableStyle('Normal Table') id: 190621384
# 获取一个表格的所有单元格
cells = table0._cells
print(len(cells)) # 表格中单元格数量
# 15
# 获取单元格内所有文字信息
cells_string = [cell.text for cell in cells]
print(cells_string)
# 获取表格对象行数量、列数量
col_num = len(table0.columns)
print(col_num) # 3
# 行数量
row_num = len(table0.rows)
print(row_num) # 5
# 获取行对象
row0 = table0.rows[0]
# 获取列对象
col0 = table0.columns[0]
# 获取行对象文字信息
'要用 row0.cells 获取行对象的 cell 才能获取其文字信息'
row0_string = [cell.text for cell in row0.cells]
print(row0_string)
# 获取列对象文字信息
col0_string = [cell.text for cell in col0.cells]
print(col0_string)
#原文链接:https://blog.csdn.net/zhouz92/article/details/107179616
https://www.jb51.net/article/143936.htm
https://www.jb51.net/article/167040.htm
https://zhuanlan.zhihu.com/p/158806667
https://blog.csdn.net/zhouz92/article/details/106883774
https://www.cnblogs.com/danhuai/p/11700407.html
import xlrd
# 打开文件方式1:
work_book = xlrd.open_workbook('test01.xls')
# 按索引获取sheet对象
sheet_1 = work_book.sheet_by_index(0)
print(sheet_1)
# ------运行结果------
# <xlrd.sheet.Sheet object at 0x000001CE3473C550>
# ------运行结果------
# 按sheet表名称获取sheet对象,名称分大小写
sheet_2 = work_book.sheet_by_name('Sheet2')
print(sheet_2)
# 按行读取
data_row = []
for row in range(sheet_1.nrows):
data_row.append(sheet_1.row_values(row))
print(data_row)
# 按列读取
data_col = [sheet_1.col_values(i) for i in range(sheet_1.ncols)]
print(data_col)
# 按行读取test01.xls 所有 sheet 表数据
all_data = {}
for i,sheet_obj in enumerate(work_book.sheets()):
all_data[i] = [sheet_obj.row_values(row)
for row in range(sheet_obj.nrows)]
print(all_data)
#原文链接:https://blog.csdn.net/zhouz92/article/details/106883774
import os
import shutil
from openpyxl.reader.excel import load_workbook
def read_excel_with_openpyxl(
filename="C:/pubip-2021-03-17.xlsx"):
wb = load_workbook(filename)
sheetnames = wb.get_sheet_names()
print(sheetnames)
data_dic = {}
for sname in sheetnames:
ws = wb.get_sheet_by_name(sname)
print(ws.title)
print(ws.max_row)
print(ws.max_column)
t2 = []
for cell in ws.values:
# print(cell)
if len(cell) > 3:
print(cell[0])
t2.append(cell[0])
print(t2)
data_dic[sname] = t2
print(data_dic)
return data_dic
def mv_docx2_project_dir(bugdir, data):
for k, v in data.items():
# mkdir k
bug_project = os.path.join(bugdir, k)
if not os.path.exists(bug_project):
os.mkdir(bug_project)
for docx in v:
# mv docx into k
bug_file = os.path.join(bugdir, docx + ".docx")
if os.path.exists(bug_file):
shutil.move(bug_file, bug_project)
newbug_file = os.path.join(bugdir, k, docx + ".docx")
if not os.path.exists(newbug_file):
print('not exist: ', newbug_file)
def doc2docx():
pass
def read_docx(dirname):
pass
def main():
data = read_excel_with_openpyxl()
bugdir = "c:/2021-03-12-ttt-漏洞列表"
mv_docx2_project_dir(bugdir, data)
if __name__ == "__main__":
main()