1. # coding=utf-8
    2. import pandas as pd
    3. import numpy as np
    4. from matplotlib import pyplot as plt
    5. df = pd.read_csv("./911.csv")
    6. print(df.head(5))
    7. #获取分类
    8. # print()df["title"].str.split(": ")
    9. temp_list = df["title"].str.split(": ").tolist()
    10. cate_list = list(set([i[0] for i in temp_list]))
    11. print(cate_list)
    12. #构造全为0的数组
    13. zeros_df = pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=cate_list)
    14. #赋值
    15. for cate in cate_list:
    16. zeros_df[cate][df["title"].str.contains(cate)] = 1
    17. # break
    18. # print(zeros_df)
    19. sum_ret = zeros_df.sum(axis=0)
    20. print(sum_ret)
    1. # coding=utf-8
    2. import pandas as pd
    3. import numpy as np
    4. from matplotlib import pyplot as plt
    5. df = pd.read_csv("./911.csv")
    6. print(df.head(5))
    7. #获取分类
    8. # print()df["title"].str.split(": ")
    9. temp_list = df["title"].str.split(": ").tolist()
    10. cate_list = [i[0] for i in temp_list]
    11. df["cate"] = pd.DataFrame(np.array(cate_list).reshape((df.shape[0],1)))
    12. # print(df.head(5))
    13. print(df.groupby(by="cate").count()["title"])