导包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.datasets import boston_housing
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras import utils
from tensorflow.keras import regularizers
pandas
显示所有行/列
import pandas as pd
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)
#设置value的显示长度为100,默认为50
pd.set_option('max_colwidth',100)
选取某些行和列
选取等于某些值的行记录 用 ==
df.loc[df['column_name'] == some_value]
选取某列是否是某一类型的数值 用 isin
df.loc[df['column_name'].isin(some_values)]
多种条件的选取 用 &
df.loc[(df['column'] == some_value) & df['other_column'].isin(some_values)]
选取不等于某些值的行记录 用 !=
df.loc[df['column_name'] != some_value]
isin返回一系列的数值,如果要选择不符合这个条件的数值使用~
df.loc[~df['column_name'].isin(some_values)]
乱序
# 乱序方法1
df.sample(frac=1)
# 乱序后重新建立顺序性索引
df.sample(frac=1).reset_index(drop=True)
# 乱序方法2
from sklearn.utils import shuffle
df = shuffle(df)
# 乱序后重新建立顺序性索引
df = shuffle(df).reset_index(drop=True)
文件I/O操作
使用open读写文件
# open("文件路径","操作") w:写入 r:读取 a:追加
with open("test.txt","w") as f:
string = "i am {}\n"
for i in range(10):
f.write(string.format(i))
#--------------------------------------------------
slist = []
with open("test.txt","r") as f:
slist=f.read()
for line in f:
print(line)
print(slist)
pickle
# 万能保存库
import pickle
# 保存模型
with open('model_name.pkl','wb') as f:
pickle.dump(model,f)
# 加载模型
with open('model_name.pkl','rb') as f:
pkl_model = pickle.load(f)
OS操作
https://blog.csdn.net/STR_Liang/article/details/110057892?utm_medium=distribute.pc_relevant.none-task-blog-baidujs_title-1&spm=1001.2101.3001.4242
import os
current_path = os.path.dirname(__file__)
流式 json读取
import ijson
with open('test.json', 'r', encoding='utf-8') as f:
objects = ijson.items(f, 'earth.europe.item')
#这个objects在这里就是相当于一个生成器,可以调用next函数取它的下一个值
while True:
try:
print(objects.__next__())
except StopIteration as e:
print("数据读取完成")
break
压缩解压操作
常见cmd命令和linux命令
python方法解压缩
时间操作
以年月日时分秒输出当前时间
import time
current_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
print(current_time)
##输出结果:2021-03-22 15:13:06
计数函数
import time
def run_time(start_time):
current_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
print(f"当前时间:{current_time}")
print("耗时:%.3f sec" %(time.time()-start_time))
pass
start = time.time()
# -------run main---------
for i in range(2):
time.sleep(1)
pass
# ------------------------
run_time(start)
## 输出结果:
# 当前时间:2021-03-22 15:24:11
# 耗时:2.000 sec
保存最有模型日志
best_score = 0
global best_score
# 每次只保存高于best_score的模型和参数
if best_score < eval_accuracy / nb_eval_steps:
best_score = eval_accuracy / nb_eval_steps
# 保存模型权重
save(model, optimizer,best_score)
# 记录最佳分数日志
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
with open("./run_log.txt", "a", encoding="utf-8") as f:
print(str(current_time)+" best_score:" + str(best_score) + " save_model_path:" + output_model_path)
f.write(str(current_time) + " best_score:" + str(best_score) + " save_model_path:" + output_model_path+ "\n")
画图
论文配图1
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Arial'] # 如果要显示中文字体,则在此处设为:SimHei
plt.rcParams['axes.unicode_minus'] = False # 显示负号
colors = ['salmon','lightskyblue','#FFF68F','palegreen','lightpink','silver','burlywood','plum','rosybrown']
# plt.switch_backend('agg')
plt.figure(figsize=(8,6))
plt.grid(linestyle="--") # 设置背景网格线为虚线
ax = plt.gca()
ax.spines['top'].set_visible(False) # 去掉上边框
ax.spines['right'].set_visible(False) # 去掉右边框
plt.plot(history.history['loss'], "red", linewidth=1.5)
plt.plot(history.history['val_loss'], 'blue', linewidth=1.5)
plt.legend(['Training loss', 'Validation Loss'], fontsize=14)
leg = plt.gca().get_legend()
ltext = leg.get_texts()
plt.setp(ltext, fontsize=14, fontweight='bold') # 设置图例字体的大小和粗细
plt.xticks([x*2 for x in range(0,20)],fontsize=12, fontweight='bold') # 默认字体大小为10
plt.yticks(fontsize=12, fontweight='bold')
plt.xlabel('Epochs ', fontsize=14,fontweight='bold')
plt.ylabel('Loss', fontsize=14,fontweight='bold')
plt.xlim(0,20) # 设置x轴的范围
plt.ylim(0,1.6) # 设置y轴的范围
fig1.savefig('./img/loss_TextAttBiRNN.png')
plt.show()