数据导入流程

  1. # 导入文件目录
  2. from pathlib import Path
  3. data_root=Path('./data') # 创建文件对象
  4. for item in data_root.iterdir():
  5. print(item)
  6. all_data_path=list(data_root.glob('*/*')) # 遍历目录下所有文件对象、
  7. all_data_path=[str(path) for path in all_data_path] # 转为列表
  8. from pathlib import Path
  9. data_root=Path('./disc/gen') # 创建文件对象
  10. data_path=[]
  11. for item in data_root.iterdir():
  12. #print(item)
  13. data_path.append(str(item))
  14. gen_data=np.array([np.loadtxt(path) for path in data_path])
  15. gen_data.shape
  16. # 打乱顺序
  17. import numpy as np
  18. np.random.shuffle(all_data_path)

数据导入导出

1. txt处理

  1. import numpy as np
  2. a=np.loadtxt('file.txt') #读取txt
  3. np.savetxt('gen2.txt',dat,fmt='%.4f',delimiter=' ') #保存txt

2. mat处理

  1. import scipy.io as scio
  2. file_name='./gendata/data{0}.mat'.format(epoch)
  3. scio.savemat(file_name,{'data':gen_datas}) #数据以字典形式保存
  4. from scipy.io import loadmat
  5. dat=loadmat(file_name)

3. csv处理

  1. Dat=pd.Dataframe(data)
  2. Dat.to_csv(file_name,index=False,header=False)

4. wav处理

  1. import librosa
  2. x1,fs=librosa.load('drive/MyDrive/dataset/uk_duda.wav',sr=None)

设置增量显存

  1. import tensorflow as tf
  2. print(tf.__version__)
  3. physical_devices = tf.config.experimental.list_physical_devices('GPU')
  4. tf.config.experimental.set_memory_growth(physical_devices[0], True)

分割数据

  1. from sklearn.model_selection import train_test_split
  2. xtrain,xtest,ytrain,ytest=train_test_split(data_ds,all_data_labels,test_size=0.3,random_state=1)

one-hot

  1. from keras.utils import to_categorical
  2. xtrain=to_categorical(xtrain)

打印模型

  1. from keras.utils import plot_model
  2. plot_model(model, to_file='model.png', show_shapes=False, show_layer_names=True)

统计标签数量

  1. unique,counts=np.unique(ytrain,return_counts=True)

混淆矩阵

  1. from sklearn.metrics import confusion_matrix
  2. conf_mat = confusion_matrix(ytest, predict_results)
  3. cm_normalized = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
  4. print(cm_normalized)
  5. import matplotlib.pyplot as plt
  6. import matplotlib
  7. import seaborn as sns
  8. matplotlib.rc("font",family='SimSun')
  9. plt.figure(figsize=(8, 6))
  10. sns.heatmap(cm_normalized,
  11. annot=True, fmt='.2g',cmap='Blues',xticklabels=['第一类','第二类','第三类','第四类'],yticklabels=['第一类','第二类','第三类','第四类'],)
  12. plt.rcParams.update({"font.size":22})
  13. plt.xlabel('Predict')
  14. plt.ylabel('Target')

采样多张并显示

  1. indexes=np.random.randint(0,xtrain.shape[0],size=25)
  2. images=xtrain[indexes]
  3. labels=ytrain[indexes]
  4. plt.figure(figsize=(6,6))
  5. for i in range(len(indexes)):
  6. plt.subplot(5,5,i+1)
  7. image=images[i]
  8. plt.imshow(image,cmap='gray')
  9. plt.axis('off')
  10. plt.show()

在末尾增加一维

  1. Data=data[…,np.newaxis]

保存图像

  1. plt.axis('off') #关闭坐标轴
  2. fig=plt.gcf() #获取当前图像
  3. fig.set_size_inches(5,5) 设置尺寸
  4. plt.plot(test_data)
  5. fig.savefig('img01.png',bbox_inches='tight') #不留白

图像预处理

  1. def load_and_preprocess_img(path):
  2. img=tf.io.read_file(path)
  3. img=tf.image.decode_jpeg(img,channels=3)
  4. # 重置尺寸,填充
  5. img=tf.image.resize_with_pad(img,target_height=160,target_width=160)
  6. img=tf.cast(img,tf.float32)
  7. img=img/255 # RGB要调整到0-1
  8. return img
  9. import matplotlib.pyplot as plt
  10. image_path = all_data_path[0]
  11. label = labels[0]
  12. plt.imshow(load_and_preprocess_img(img_path))
  13. plt.grid(False)
  14. # plt.xlabel(caption_image(label))
  15. print()

TSNE

  1. from mpl_toolkits.mplot3d import Axes3D
  2. from sklearn.manifold import TSNE
  3. import matplotlib
  4. import matplotlib.pyplot as plt
  5. # generate date series
  6. all_data = np.concatenate((disc_ori, draga_ori, pesq_ori, pesv_ori))
  7. l1 = np.array([10 for i in range(lenth)])
  8. l2 = np.array([20 for i in range(lenth)])
  9. l3 = np.array([30 for i in range(lenth)])
  10. l4 = np.array([40 for i in range(lenth)])
  11. labels = np.concatenate((l1, l2, l3, l4))
  12. symbols = ['*']*lenth+['#']*lenth+['+']*lenth+['o']*lenth
  13. matplotlib.rc("font", family='SimSun')
  14. def plot_embedding_3d(X, Y, symbols, title=None, file=None):
  15. tsne = TSNE(n_components=3, init='pca', random_state=2)
  16. X_tsne = tsne.fit_transform(X)
  17. # 坐标缩放到[0,1]区间
  18. x_min, x_max = np.min(X_tsne, axis=0), np.max(X_tsne, axis=0)
  19. X = (X_tsne - x_min) / (x_max - x_min)
  20. # 降维后的坐标为(X[i, 0], X[i, 1],X[i,2]),在该位置画出对应的digits
  21. fig = plt.figure(dpi=150)
  22. ax = fig.add_subplot(1, 1, 1, projection='3d')
  23. for i in range(len(Y)):
  24. ax.text(X[i, 0], X[i, 1], X[i, 2], str(symbols[i]),
  25. color=plt.cm.Set1((Y[i]) / 80.),
  26. fontdict={'weight': 'bold', 'size': 15})
  27. if title is not None:
  28. plt.title(title)
  29. if file is not None:
  30. plt.savefig(f'{file}.svg')
  31. # %%
  32. plot_embedding_3d(all_data, labels, symbols, file='all_data')

numpy数据拼接

  1. # 水平拼接
  2. aa = np.zeros(shape=(30, 12))
  3. bb = np.zeros(shape=(30, 13))
  4. cc = np.hstack([aa, bb]) # 相当于np.concatenate((aa,bb),axis=1)
  5. print(cc.shape) # (30,25)
  6. # 垂直拼接
  7. aa = np.zeros(shape=(30, 12))
  8. bb = np.zeros(shape=(20, 12))
  9. cc = np.vstack([aa, bb]) # 相当于np.concatenate((aa,bb),axis=0)
  10. print(cc.shape) # (50,12)