大量数据,分块读取。
import pandas as pdread_path = r'D:\06_items\01_数据处理\dispose_data\aaa.xlsx'def read_thunk(read_path, nrows):xl = pd.ExcelFile(read_path)print('xl--->', xl)sheet_names = xl.sheet_namesprint(sheet_names)sheet = xl.sheet_names[0]# 只获取第一行,即表头df_header = pd.read_excel(read_path, sheet_name=sheet, nrows=1)print(f"Excel file: {read_path} (worksheet: {sheet})")chunks = []i_chunk = 0skiprows = 1while True:df_chunk = pd.read_excel(read_path, sheet_name=sheet,nrows=nrows, skiprows=skiprows, header=None)skiprows += nrows# 判断读取的行数df_chunk.shape[0],(df.shape[1]表示列数)if not df_chunk.shape[0]:breakelse:print(f" - chunk {i_chunk} ({df_chunk.shape[0]} rows)")chunks.append(df_chunk)i_chunk += 1df_chunks = pd.concat(chunks)columns = {i: col for i, col in enumerate(df_header.columns.tolist())}df_chunks.rename(columns=columns, inplace=True)# 连接表头和数据df = pd.concat([df_header, df_chunks])return dfif __name__ == '__main__':# 每次读取1000行df = read_thunk(read_path, 1000)
