K-means算法详解.pdf
代码实现
clear
clc
load Iris.mat
%随机选取k个点作为簇类中心
k = 10; %选取10个聚类分析
distance_2 = 0;
matrix_row = size(num,1);
K = randperm(matrix_row,k);%因为是随机选取的点,所以从所有数中随机选取k个种子
%%这个部分主要是用来计算第一次选取的种子与所有点的距离的
distance_matrix = zeros(matrix_row,k);
for i=1:matrix_row
for j = 1:k
for l = 2:size(num,2)
distance_2 = distance_2 + (num(i,l)- num(K(j),l))^2;
end
distance_matrix(i,j) = distance_2;
distance_2 = 0;
end
end
distance_matrix = sqrt(distance_matrix);
%接下来开始第2次及以后的迭代过程
s = 1;
while s<10 %迭代9次
[min_array index] = min(distance_matrix,[],2);%根据min函数找到所有点到聚类中心的最小值,从而分类
%计算平均值,更新簇类中心点
Kmean_matrix = [];
Kmean_matrix_flag = [];%这个矩阵用于判断迭代前后两次中心是否一致
for j = 1:k
Class_matrix = [];
for i =1:size(index)
if j == index(i)
Class_matrix = [Class_matrix;num(i,2:size(num,2))];
end
Kmean = mean(Class_matrix,1);%计算出分类矩阵的平均值
end
Kmean_matrix = [Kmean_matrix;Kmean];%算出了平均值,求得了平均值后的矩阵
if isequal(Kmean_matrix,Kmean_matrix_flag)%用于判断的第二个条件,即迭代前后是否一致
break
else
Kmean_matrix_flag = Kmean_matrix;
end
end
%%重新计算平均值与所有点的距离
distance_2 = 0;
for i=1:size(num,1)
for l = 1:size(Kmean_matrix,1)
for j = 2:size(num,2)
distance_2 = distance_2 + (num(i,j) - Kmean_matrix(l,j-1))^2;
end
distance_matrix(i,l) = sqrt(distance_2);
distance_2 = 0;
end
end
%%计算总体方差:
%先分类,因为要找到4个类对应的方差
for j = 1:k
flag = 1;
for i = 1:size(index)
if index(i) == j
Classifiction_matrix(j,flag) = i;
flag = flag+1;
else
Classifiction_matrix(j,flag) = 0;
flag = flag+1;
end
end
end
%%找到每个类对应的方差
distance = 0;
for i = 1:k
for j = 1:size(Classifiction_matrix,2)
if Classifiction_matrix(i,j) ~= 0
for l = 2:size(num,2)
distance = distance + (num(Classifiction_matrix(i,j),l)-Kmean_matrix(i,l-1))^2;
end
end
end
end
disp('中心距离');
disp(distance);
s = s+1;
disp(s)
end