导包

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn.preprocessing import MinMaxScaler
  5. import tensorflow as tf
  6. from tensorflow.keras.datasets import boston_housing
  7. from tensorflow.keras.models import Model
  8. from tensorflow.keras.preprocessing import sequence
  9. from tensorflow.keras.layers import Dense,Dropout
  10. from tensorflow.keras import utils
  11. from tensorflow.keras import regularizers

pandas

显示所有行/列

  1. import pandas as pd
  2. #显示所有列
  3. pd.set_option('display.max_columns', None)
  4. #显示所有行
  5. pd.set_option('display.max_rows', None)
  6. #设置value的显示长度为100,默认为50
  7. pd.set_option('max_colwidth',100)

选取某些行和列

  1. 选取等于某些值的行记录 ==
  2. df.loc[df['column_name'] == some_value]
  3. 选取某列是否是某一类型的数值 isin
  4. df.loc[df['column_name'].isin(some_values)]
  5. 多种条件的选取 &
  6. df.loc[(df['column'] == some_value) & df['other_column'].isin(some_values)]
  7. 选取不等于某些值的行记录 !=
  8. df.loc[df['column_name'] != some_value]
  9. isin返回一系列的数值,如果要选择不符合这个条件的数值使用~
  10. df.loc[~df['column_name'].isin(some_values)]

乱序

  1. # 乱序方法1
  2. df.sample(frac=1)
  3. # 乱序后重新建立顺序性索引
  4. df.sample(frac=1).reset_index(drop=True)
  5. # 乱序方法2
  6. from sklearn.utils import shuffle
  7. df = shuffle(df)
  8. # 乱序后重新建立顺序性索引
  9. df = shuffle(df).reset_index(drop=True)

文件I/O操作

使用open读写文件

  1. # open("文件路径","操作") w:写入 r:读取 a:追加
  2. with open("test.txt","w") as f:
  3. string = "i am {}\n"
  4. for i in range(10):
  5. f.write(string.format(i))
  6. #--------------------------------------------------
  7. slist = []
  8. with open("test.txt","r") as f:
  9. slist=f.read()
  10. for line in f:
  11. print(line)
  12. print(slist)

pickle

  1. # 万能保存库
  2. import pickle
  3. # 保存模型
  4. with open('model_name.pkl','wb') as f:
  5. pickle.dump(model,f)
  6. # 加载模型
  7. with open('model_name.pkl','rb') as f:
  8. pkl_model = pickle.load(f)

OS操作

https://blog.csdn.net/STR_Liang/article/details/110057892?utm_medium=distribute.pc_relevant.none-task-blog-baidujs_title-1&spm=1001.2101.3001.4242

  1. import os
  2. current_path = os.path.dirname(__file__)

流式 json读取

  1. import ijson
  2. with open('test.json', 'r', encoding='utf-8') as f:
  3. objects = ijson.items(f, 'earth.europe.item')
  4. #这个objects在这里就是相当于一个生成器,可以调用next函数取它的下一个值
  5. while True:
  6. try:
  7. print(objects.__next__())
  8. except StopIteration as e:
  9. print("数据读取完成")
  10. break

压缩解压操作

常见cmd命令和linux命令

python方法解压缩

时间操作

以年月日时分秒输出当前时间

  1. import time
  2. current_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
  3. print(current_time)
  4. ##输出结果:2021-03-22 15:13:06

计数函数

  1. import time
  2. def run_time(start_time):
  3. current_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
  4. print(f"当前时间:{current_time}")
  5. print("耗时:%.3f sec" %(time.time()-start_time))
  6. pass
  7. start = time.time()
  8. # -------run main---------
  9. for i in range(2):
  10. time.sleep(1)
  11. pass
  12. # ------------------------
  13. run_time(start)
  14. ## 输出结果:
  15. # 当前时间:2021-03-22 15:24:11
  16. # 耗时:2.000 sec

保存最有模型日志

  1. best_score = 0
  2. global best_score
  3. # 每次只保存高于best_score的模型和参数
  4. if best_score < eval_accuracy / nb_eval_steps:
  5. best_score = eval_accuracy / nb_eval_steps
  6. # 保存模型权重
  7. save(model, optimizer,best_score)
  8. # 记录最佳分数日志
  9. current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  10. with open("./run_log.txt", "a", encoding="utf-8") as f:
  11. print(str(current_time)+" best_score:" + str(best_score) + " save_model_path:" + output_model_path)
  12. f.write(str(current_time) + " best_score:" + str(best_score) + " save_model_path:" + output_model_path+ "\n")

画图

论文配图1

  1. import matplotlib.pyplot as plt
  2. plt.rcParams['font.sans-serif'] = ['Arial'] # 如果要显示中文字体,则在此处设为:SimHei
  3. plt.rcParams['axes.unicode_minus'] = False # 显示负号
  4. colors = ['salmon','lightskyblue','#FFF68F','palegreen','lightpink','silver','burlywood','plum','rosybrown']
  5. # plt.switch_backend('agg')
  6. plt.figure(figsize=(8,6))
  7. plt.grid(linestyle="--") # 设置背景网格线为虚线
  8. ax = plt.gca()
  9. ax.spines['top'].set_visible(False) # 去掉上边框
  10. ax.spines['right'].set_visible(False) # 去掉右边框
  11. plt.plot(history.history['loss'], "red", linewidth=1.5)
  12. plt.plot(history.history['val_loss'], 'blue', linewidth=1.5)
  13. plt.legend(['Training loss', 'Validation Loss'], fontsize=14)
  14. leg = plt.gca().get_legend()
  15. ltext = leg.get_texts()
  16. plt.setp(ltext, fontsize=14, fontweight='bold') # 设置图例字体的大小和粗细
  17. plt.xticks([x*2 for x in range(0,20)],fontsize=12, fontweight='bold') # 默认字体大小为10
  18. plt.yticks(fontsize=12, fontweight='bold')
  19. plt.xlabel('Epochs ', fontsize=14,fontweight='bold')
  20. plt.ylabel('Loss', fontsize=14,fontweight='bold')
  21. plt.xlim(0,20) # 设置x轴的范围
  22. plt.ylim(0,1.6) # 设置y轴的范围
  23. fig1.savefig('./img/loss_TextAttBiRNN.png')
  24. plt.show()

image.png