Pandas
NumPy
https://blog.csdn.net/weixin_43593330/article/details/89882187
实例:numpy读取CSV文件
import paddle
import numpy as np
import json
import matplotlib.pyplot as plt
import os
# import csv
# 读入训练数据
train_data_path = 'work/kidney_data2.csv'
test_data_path = 'work/kidney_data_test.csv'
# np.loadtxt(data_path,dtype="",delimiter=None,skiprows=0,usecols=None,unpack=False)
# train_data_path,文件路径字符串
# dtype=np.float32,数据类型设置为float32
# delimiter=",",csv文件数据分隔符为",",英文逗号
# skiprows=2,跳过前两行
# usecols=(1,2,3,4,5,6,7,8,9)),选取特定列
train_dataset = np.loadtxt(train_data_path,
dtype=np.float32,
delimiter=",",
skiprows=2,
usecols=(1,2,3,4,5,6,7,8,9))
# 显示一下
train_dataset
array([[ 0. , 9.8 , 4.1 , …, 82. , 35. , 0.57],
[ 0. , 10.3 , 5.3 , …, 142. , 48. , 0.66],
[ 0. , 11.8 , 5.1 , …, 64. , 31.2 , 0.5 ],
…,
[ 1. , 9.8 , 4.2 , …, 50.5 , 17.1 , 0.66],
[ 1. , 11.2 , 5.2 , …, 58.5 , 15.2 , 0.74],
[ 1. , 11.6 , 5.7 , …, 128. , 36.5 , 0.71]],
dtype=float32)
# 同样的方法,读取测试集文件
test_dataset = np.loadtxt(test_data_path,
dtype=np.float32,
delimiter=",",
skiprows=2,
usecols=(1,2,3,4,5,6,7,8,9))
test_dataset