1. #!/usr/bin/env python
    2. # coding: utf-8
    3. import numpy as np
    4. import pandas as pd
    5. df = pd.read_csv(r"D:\Mywork\DB\MANIFEST_CONSUME_RECORD_202206081957.csv",
    6. usecols =['CREATE_TIME','COMPANY_NAME'])
    7. df
    8. #转换为时间类型
    9. df['CREATE_TIME'] = pd.to_datetime(df['CREATE_TIME'])
    10. #把时间格式改为年月日不要时分秒
    11. df['yearmonthday'] = df['CREATE_TIME'].apply(lambda x: x.strftime('%Y-%m-%d'))
    12. #去掉两个字段的重复项
    13. df1 = df.drop_duplicates(subset=['COMPANY_NAME','yearmonthday'],keep='first')
    14. #按两个字段排序
    15. df2 = df1.sort_values(['COMPANY_NAME','yearmonthday'])
    16. #生成新的一列
    17. df2['newtime'] = df2.groupby('COMPANY_NAME')['yearmonthday'].shift(-1)
    18. df2
    19. #替换nan值
    20. df3 = df2.fillna('2022-06-01')
    21. df3
    22. #转换时间
    23. df3['newtime2']=pd.to_datetime(df3['yearmonthday'])
    24. df3['newtime3']=pd.to_datetime(df3['newtime'])
    25. df3
    26. df3['diff'] = df3['newtime3'] - df3['newtime2']
    27. df3
    28. group = df3.groupby('COMPANY_NAME').max()
    29. group
    30. group.to_csv("D:\Mywork\DB\diff3",sep=',')