https://blog.csdn.net/zhouz92/article/details/107179616

    1. from docx import Document
    2. # Document 类,不仅可以新建word文档,也可以打开一个本地文档
    3. doc = Document('test03.docx')
    4. tables = doc.tables # 获取文档中所有表格对象的列表
    5. print(tables)
    6. # [<docx.table.Table object at 0x000001957059CD48>]
    7. print(len(tables)) # 查看文档中表格数量
    8. # 1
    9. table0 = tables[0] # 获取表格对象
    10. # 获取表格的样式信息
    11. print(table0.style)
    12. # _TableStyle('Normal Table') id: 190621384
    13. # 获取一个表格的所有单元格
    14. cells = table0._cells
    15. print(len(cells)) # 表格中单元格数量
    16. # 15
    17. # 获取单元格内所有文字信息
    18. cells_string = [cell.text for cell in cells]
    19. print(cells_string)
    20. # 获取表格对象行数量、列数量
    21. col_num = len(table0.columns)
    22. print(col_num) # 3
    23. # 行数量
    24. row_num = len(table0.rows)
    25. print(row_num) # 5
    26. # 获取行对象
    27. row0 = table0.rows[0]
    28. # 获取列对象
    29. col0 = table0.columns[0]
    30. # 获取行对象文字信息
    31. '要用 row0.cells 获取行对象的 cell 才能获取其文字信息'
    32. row0_string = [cell.text for cell in row0.cells]
    33. print(row0_string)
    34. # 获取列对象文字信息
    35. col0_string = [cell.text for cell in col0.cells]
    36. print(col0_string)
    37. #原文链接:https://blog.csdn.net/zhouz92/article/details/107179616

    https://www.jb51.net/article/143936.htm
    https://www.jb51.net/article/167040.htm

    https://zhuanlan.zhihu.com/p/158806667
    https://blog.csdn.net/zhouz92/article/details/106883774
    https://www.cnblogs.com/danhuai/p/11700407.html

    1. import xlrd
    2. # 打开文件方式1:
    3. work_book = xlrd.open_workbook('test01.xls')
    4. # 按索引获取sheet对象
    5. sheet_1 = work_book.sheet_by_index(0)
    6. print(sheet_1)
    7. # ------运行结果------
    8. # <xlrd.sheet.Sheet object at 0x000001CE3473C550>
    9. # ------运行结果------
    10. # 按sheet表名称获取sheet对象,名称分大小写
    11. sheet_2 = work_book.sheet_by_name('Sheet2')
    12. print(sheet_2)
    13. # 按行读取
    14. data_row = []
    15. for row in range(sheet_1.nrows):
    16. data_row.append(sheet_1.row_values(row))
    17. print(data_row)
    18. # 按列读取
    19. data_col = [sheet_1.col_values(i) for i in range(sheet_1.ncols)]
    20. print(data_col)
    21. # 按行读取test01.xls 所有 sheet 表数据
    22. all_data = {}
    23. for i,sheet_obj in enumerate(work_book.sheets()):
    24. all_data[i] = [sheet_obj.row_values(row)
    25. for row in range(sheet_obj.nrows)]
    26. print(all_data)
    27. #原文链接:https://blog.csdn.net/zhouz92/article/details/106883774
    1. import os
    2. import shutil
    3. from openpyxl.reader.excel import load_workbook
    4. def read_excel_with_openpyxl(
    5. filename="C:/pubip-2021-03-17.xlsx"):
    6. wb = load_workbook(filename)
    7. sheetnames = wb.get_sheet_names()
    8. print(sheetnames)
    9. data_dic = {}
    10. for sname in sheetnames:
    11. ws = wb.get_sheet_by_name(sname)
    12. print(ws.title)
    13. print(ws.max_row)
    14. print(ws.max_column)
    15. t2 = []
    16. for cell in ws.values:
    17. # print(cell)
    18. if len(cell) > 3:
    19. print(cell[0])
    20. t2.append(cell[0])
    21. print(t2)
    22. data_dic[sname] = t2
    23. print(data_dic)
    24. return data_dic
    25. def mv_docx2_project_dir(bugdir, data):
    26. for k, v in data.items():
    27. # mkdir k
    28. bug_project = os.path.join(bugdir, k)
    29. if not os.path.exists(bug_project):
    30. os.mkdir(bug_project)
    31. for docx in v:
    32. # mv docx into k
    33. bug_file = os.path.join(bugdir, docx + ".docx")
    34. if os.path.exists(bug_file):
    35. shutil.move(bug_file, bug_project)
    36. newbug_file = os.path.join(bugdir, k, docx + ".docx")
    37. if not os.path.exists(newbug_file):
    38. print('not exist: ', newbug_file)
    39. def doc2docx():
    40. pass
    41. def read_docx(dirname):
    42. pass
    43. def main():
    44. data = read_excel_with_openpyxl()
    45. bugdir = "c:/2021-03-12-ttt-漏洞列表"
    46. mv_docx2_project_dir(bugdir, data)
    47. if __name__ == "__main__":
    48. main()