K-means算法详解.pdf
代码实现
clearclcload Iris.mat%随机选取k个点作为簇类中心k = 10; %选取10个聚类分析distance_2 = 0;matrix_row = size(num,1);K = randperm(matrix_row,k);%因为是随机选取的点,所以从所有数中随机选取k个种子%%这个部分主要是用来计算第一次选取的种子与所有点的距离的distance_matrix = zeros(matrix_row,k); for i=1:matrix_row for j = 1:k for l = 2:size(num,2) distance_2 = distance_2 + (num(i,l)- num(K(j),l))^2; end distance_matrix(i,j) = distance_2; distance_2 = 0; endenddistance_matrix = sqrt(distance_matrix);%接下来开始第2次及以后的迭代过程s = 1;while s<10 %迭代9次[min_array index] = min(distance_matrix,[],2);%根据min函数找到所有点到聚类中心的最小值,从而分类%计算平均值,更新簇类中心点Kmean_matrix = [];Kmean_matrix_flag = [];%这个矩阵用于判断迭代前后两次中心是否一致for j = 1:k Class_matrix = []; for i =1:size(index) if j == index(i) Class_matrix = [Class_matrix;num(i,2:size(num,2))]; end Kmean = mean(Class_matrix,1);%计算出分类矩阵的平均值 end Kmean_matrix = [Kmean_matrix;Kmean];%算出了平均值,求得了平均值后的矩阵 if isequal(Kmean_matrix,Kmean_matrix_flag)%用于判断的第二个条件,即迭代前后是否一致 break else Kmean_matrix_flag = Kmean_matrix; endend%%重新计算平均值与所有点的距离distance_2 = 0;for i=1:size(num,1) for l = 1:size(Kmean_matrix,1) for j = 2:size(num,2) distance_2 = distance_2 + (num(i,j) - Kmean_matrix(l,j-1))^2; end distance_matrix(i,l) = sqrt(distance_2); distance_2 = 0; endend%%计算总体方差:%先分类,因为要找到4个类对应的方差for j = 1:k flag = 1; for i = 1:size(index) if index(i) == j Classifiction_matrix(j,flag) = i; flag = flag+1; else Classifiction_matrix(j,flag) = 0; flag = flag+1; end endend%%找到每个类对应的方差distance = 0;for i = 1:k for j = 1:size(Classifiction_matrix,2) if Classifiction_matrix(i,j) ~= 0 for l = 2:size(num,2) distance = distance + (num(Classifiction_matrix(i,j),l)-Kmean_matrix(i,l-1))^2; end end endenddisp('中心距离');disp(distance);s = s+1;disp(s)end