#!/usr/bin/env python# coding: utf-8import numpy as npimport pandas as pddf = pd.read_csv(r"D:\Mywork\DB\MANIFEST_CONSUME_RECORD_202206081957.csv", usecols =['CREATE_TIME','COMPANY_NAME'])df#转换为时间类型df['CREATE_TIME'] = pd.to_datetime(df['CREATE_TIME'])#把时间格式改为年月日不要时分秒df['yearmonthday'] = df['CREATE_TIME'].apply(lambda x: x.strftime('%Y-%m-%d'))#去掉两个字段的重复项df1 = df.drop_duplicates(subset=['COMPANY_NAME','yearmonthday'],keep='first')#按两个字段排序df2 = df1.sort_values(['COMPANY_NAME','yearmonthday'])#生成新的一列df2['newtime'] = df2.groupby('COMPANY_NAME')['yearmonthday'].shift(-1)df2#替换nan值df3 = df2.fillna('2022-06-01')df3#转换时间df3['newtime2']=pd.to_datetime(df3['yearmonthday'])df3['newtime3']=pd.to_datetime(df3['newtime'])df3df3['diff'] = df3['newtime3'] - df3['newtime2']df3group = df3.groupby('COMPANY_NAME').max()groupgroup.to_csv("D:\Mywork\DB\diff3",sep=',')