数据导入流程
# 导入文件目录from pathlib import Pathdata_root=Path('./data') # 创建文件对象for item in data_root.iterdir():print(item)all_data_path=list(data_root.glob('*/*')) # 遍历目录下所有文件对象、all_data_path=[str(path) for path in all_data_path] # 转为列表from pathlib import Pathdata_root=Path('./disc/gen') # 创建文件对象data_path=[]for item in data_root.iterdir():#print(item)data_path.append(str(item))gen_data=np.array([np.loadtxt(path) for path in data_path])gen_data.shape# 打乱顺序import numpy as npnp.random.shuffle(all_data_path)
数据导入导出
1. txt处理
import numpy as npa=np.loadtxt('file.txt') #读取txtnp.savetxt('gen2.txt',dat,fmt='%.4f',delimiter=' ') #保存txt
2. mat处理
import scipy.io as sciofile_name='./gendata/data{0}.mat'.format(epoch)scio.savemat(file_name,{'data':gen_datas}) #数据以字典形式保存from scipy.io import loadmatdat=loadmat(file_name)
3. csv处理
Dat=pd.Dataframe(data)Dat.to_csv(file_name,index=False,header=False)
4. wav处理
import librosax1,fs=librosa.load('drive/MyDrive/dataset/uk_duda.wav',sr=None)
设置增量显存
import tensorflow as tfprint(tf.__version__)physical_devices = tf.config.experimental.list_physical_devices('GPU')tf.config.experimental.set_memory_growth(physical_devices[0], True)
分割数据
from sklearn.model_selection import train_test_splitxtrain,xtest,ytrain,ytest=train_test_split(data_ds,all_data_labels,test_size=0.3,random_state=1)
one-hot
from keras.utils import to_categoricalxtrain=to_categorical(xtrain)
打印模型
from keras.utils import plot_modelplot_model(model, to_file='model.png', show_shapes=False, show_layer_names=True)
统计标签数量
unique,counts=np.unique(ytrain,return_counts=True)
混淆矩阵
from sklearn.metrics import confusion_matrixconf_mat = confusion_matrix(ytest, predict_results)cm_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]print(cm_normalized)import matplotlib.pyplot as pltimport matplotlibimport seaborn as snsmatplotlib.rc("font",family='SimSun')plt.figure(figsize=(8, 6))sns.heatmap(cm_normalized,annot=True, fmt='.2g',cmap='Blues',xticklabels=['第一类','第二类','第三类','第四类'],yticklabels=['第一类','第二类','第三类','第四类'],)plt.rcParams.update({"font.size":22})plt.xlabel('Predict')plt.ylabel('Target')
采样多张并显示
indexes=np.random.randint(0,xtrain.shape[0],size=25)images=xtrain[indexes]labels=ytrain[indexes]plt.figure(figsize=(6,6))for i in range(len(indexes)):plt.subplot(5,5,i+1)image=images[i]plt.imshow(image,cmap='gray')plt.axis('off')plt.show()
在末尾增加一维
Data=data[…,np.newaxis]
保存图像
plt.axis('off') #关闭坐标轴fig=plt.gcf() #获取当前图像fig.set_size_inches(5,5) 设置尺寸plt.plot(test_data)fig.savefig('img01.png',bbox_inches='tight') #不留白
图像预处理
def load_and_preprocess_img(path):img=tf.io.read_file(path)img=tf.image.decode_jpeg(img,channels=3)# 重置尺寸,填充img=tf.image.resize_with_pad(img,target_height=160,target_width=160)img=tf.cast(img,tf.float32)img=img/255 # RGB要调整到0-1return imgimport matplotlib.pyplot as pltimage_path = all_data_path[0]label = labels[0]plt.imshow(load_and_preprocess_img(img_path))plt.grid(False)# plt.xlabel(caption_image(label))print()
TSNE
from mpl_toolkits.mplot3d import Axes3Dfrom sklearn.manifold import TSNEimport matplotlibimport matplotlib.pyplot as plt# generate date seriesall_data = np.concatenate((disc_ori, draga_ori, pesq_ori, pesv_ori))l1 = np.array([10 for i in range(lenth)])l2 = np.array([20 for i in range(lenth)])l3 = np.array([30 for i in range(lenth)])l4 = np.array([40 for i in range(lenth)])labels = np.concatenate((l1, l2, l3, l4))symbols = ['*']*lenth+['#']*lenth+['+']*lenth+['o']*lenthmatplotlib.rc("font", family='SimSun')def plot_embedding_3d(X, Y, symbols, title=None, file=None):tsne = TSNE(n_components=3, init='pca', random_state=2)X_tsne = tsne.fit_transform(X)# 坐标缩放到[0,1]区间x_min, x_max = np.min(X_tsne, axis=0), np.max(X_tsne, axis=0)X = (X_tsne - x_min) / (x_max - x_min)# 降维后的坐标为(X[i, 0], X[i, 1],X[i,2]),在该位置画出对应的digitsfig = plt.figure(dpi=150)ax = fig.add_subplot(1, 1, 1, projection='3d')for i in range(len(Y)):ax.text(X[i, 0], X[i, 1], X[i, 2], str(symbols[i]),color=plt.cm.Set1((Y[i]) / 80.),fontdict={'weight': 'bold', 'size': 15})if title is not None:plt.title(title)if file is not None:plt.savefig(f'{file}.svg')# %%plot_embedding_3d(all_data, labels, symbols, file='all_data')
numpy数据拼接
# 水平拼接aa = np.zeros(shape=(30, 12))bb = np.zeros(shape=(30, 13))cc = np.hstack([aa, bb]) # 相当于np.concatenate((aa,bb),axis=1)print(cc.shape) # (30,25)# 垂直拼接aa = np.zeros(shape=(30, 12))bb = np.zeros(shape=(20, 12))cc = np.vstack([aa, bb]) # 相当于np.concatenate((aa,bb),axis=0)print(cc.shape) # (50,12)
