需要注意的几个点
- 脚本是按行复制粘贴的,如果抓取的文件表头设置不对,会出现错位

有的表可能不是按”表头1,表头2,…”这样设置,可能多了一列或者少了一列
- 有的表的单元格会链接到别的表,运行时会跳出弹窗
- 大多数的代码是用来处理合并单元格的
- 默认选择工作簿的第一张表,需要抓取的表可能在第二张
几个有用的库和函数
这一块我就直接cv大法了文件名去除后缀
def getFilenameWithoutSuffix(path):filename = os.path.basename(path)filename = filename.split('.')[1] #文件名是22.xxxx.xlsxreturn filename
正则匹配字符串
def matchStr(input_str,ref_str):if type(input_str) == str:#确定输入的是字符串match = re.search(ref_str,input_str)if match:return Trueelse:return Falseelse:return False
字符串区间的差值
#从"2:4"这样的字符串中获取差值def getDifNumber(input_str):num_list = input_str.split(':')dif_num = int(num_list[1]) - int(num_list[0])return dif_num
获取单元格的地址/列数转成字母
def getCellAddress(sheet,row,col):temp_address = sheet.Cells(row,col).Addresscell_address = temp_address.replace('$','')return cell_address
glob库
字符串拼接
代码
import os,globimport win32com.clientimport win32apiimport re,timeimport ioimport syssys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')#这三行为了让终端显示中文def getFilenameWithoutSuffix(path):filename = os.path.basename(path)filename = filename.split('.')[1] #文件名是22.xxxx.xlsxreturn filenamedef matchStr(input_str,ref_str):if type(input_str) == str:match = re.search(ref_str,input_str)if match:return Trueelse:return Falseelse:return Falsedef checkNewTable(sheet,rowNum,colNum):for row in range(1,rowNum+1):for col in range(1,colNum+1):if matchStr(sheet.Cells(row,col).Value,'设备招标情况'):return [True,row,col]elif matchStr(sheet.Cells(row,col).Value,'采购状态'):return [False,row,col]print('\033[1;35m 查询不到表头! \033[0m')return [0,0,0]def strProcess(input_str):input_str = input_str.replace('$','')#去除$号b=re.sub('[A-Za-z]','',input_str)#去除字母return b#从"2:4"这样的字符串中获取差值def getDifNumber(input_str):num_list = input_str.split(':')dif_num = int(num_list[1]) - int(num_list[0])return dif_numdef pasteRows(sheet,row,paste):paste_range = str(row) + ':' + str(row)sheet.Range(paste_range).PasteSpecial(Paste=paste)#要带列宽的话需要多执行一遍Paste=8def getCellAddress(sheet,row,col):temp_address = sheet.Cells(row,col).Addresscell_address = temp_address.replace('$','')return cell_addressfile_dir = glob.glob(r'E:\1、正在执行项目\*.xlsx')pos_col = 3 #以经办人这一列来确定是否为有效行sht2_row = 4 #开始复制的行try:xlApp = win32com.client.Dispatch('Excel.Application')xlBook2 = xlApp.Workbooks.Open(r'E:\测试\测试.xlsx')sht2 = xlBook2.Sheets(1)for i in range(len(file_dir)):xlBook1 = xlApp.Workbooks.Open(Filename=file_dir[i],ReadOnly=True)filenameWithoutSuffix = getFilenameWithoutSuffix(file_dir[i])print('\033[1;32;43m ',filenameWithoutSuffix,' \033[0m!')sht1 = xlBook1.Sheets(1) #默认选择第一张表rowNum = sht1.UsedRange.Rows.Count #返回文件1的行数colNum = sht1.UsedRange.Columns.Count #返回文件1的列数cell_state = checkNewTable(sht1,rowNum,colNum)header_row = cell_state[1] #如果表头是合并单元格,也没必要加一match_col = cell_state[2] #匹配列的列数if cell_state[0]==True:for row in range(header_row+1,rowNum+1):pos_cell = sht1.Cells(row,pos_col)cell = sht1.Cells(row,match_col)print('第',row,'行:',end=' ')sys.stdout.flush() #刷新缓存区if type(pos_cell.Value) == str:if pos_cell.MergeCells :temp_address = sht1.Cells(row,pos_col).MergeArea.Address#获取合并单元格的地址rows_range = strProcess(temp_address)#清洗得到的字符串print('合并单元格行数--',rows_range)sys.stdout.flush()dif_num = getDifNumber(rows_range)if matchStr(cell.Value,'合同已签') == False :print(cell.Value)sht1.Range(rows_range).Copy() #复制这几行pasteRows(sht2,sht2_row,14) #粘贴到总表cell_address = getCellAddress(sht2,sht2_row,colNum+1)sht2.Range(cell_address).Value = filenameWithoutSuffix#这两行在表末尾增加了一列,给单元格赋值,内容是表名称last_cell = getCellAddress(sht2,sht2_row + dif_num,colNum+1)merge_range = cell_address + ':' + last_cellsht2.Range(merge_range).Merge() #合并单元格sht2.Range(merge_range).Borders.LineStyle = 1 #加框线sht2.Range(merge_range).HorizontalAlignment = -4108 #设置水平居中print('复制到表2第',sht2_row,'行',end='***')sht2_row += dif_num + 1print('下次行数:',sht2_row)sys.stdout.flush()row += dif_numelse:print('非空的标准单元格:',pos_cell.Value)if matchStr(cell.Value,'合同已签')==False :sht1.Rows(row).Copy()#复制这几行pasteRows(sht2,sht2_row,14)sht2_row += 1sys.stdout.flush() #刷新缓存区sht2.Cells.FormatConditions.Delete()#清除条件格式,不然格子的颜色会乱掉xlApp.CutCopyMode = FalsexlBook1.Close()xlBook2.Save()finally:# if xlApp.ClipboardFormats[0]:# xlApp.CutCopyMode = False#清空剪贴板xlApp.quit()
