K-means算法详解.pdf

代码实现

  1. clear
  2. clc
  3. load Iris.mat
  4. %随机选取k个点作为簇类中心
  5. k = 10; %选取10个聚类分析
  6. distance_2 = 0;
  7. matrix_row = size(num,1);
  8. K = randperm(matrix_row,k);%因为是随机选取的点,所以从所有数中随机选取k个种子
  9. %%这个部分主要是用来计算第一次选取的种子与所有点的距离的
  10. distance_matrix = zeros(matrix_row,k);
  11. for i=1:matrix_row
  12. for j = 1:k
  13. for l = 2:size(num,2)
  14. distance_2 = distance_2 + (num(i,l)- num(K(j),l))^2;
  15. end
  16. distance_matrix(i,j) = distance_2;
  17. distance_2 = 0;
  18. end
  19. end
  20. distance_matrix = sqrt(distance_matrix);
  21. %接下来开始第2次及以后的迭代过程
  22. s = 1;
  23. while s<10 %迭代9
  24. [min_array index] = min(distance_matrix,[],2);%根据min函数找到所有点到聚类中心的最小值,从而分类
  25. %计算平均值,更新簇类中心点
  26. Kmean_matrix = [];
  27. Kmean_matrix_flag = [];%这个矩阵用于判断迭代前后两次中心是否一致
  28. for j = 1:k
  29. Class_matrix = [];
  30. for i =1:size(index)
  31. if j == index(i)
  32. Class_matrix = [Class_matrix;num(i,2:size(num,2))];
  33. end
  34. Kmean = mean(Class_matrix,1);%计算出分类矩阵的平均值
  35. end
  36. Kmean_matrix = [Kmean_matrix;Kmean];%算出了平均值,求得了平均值后的矩阵
  37. if isequal(Kmean_matrix,Kmean_matrix_flag)%用于判断的第二个条件,即迭代前后是否一致
  38. break
  39. else
  40. Kmean_matrix_flag = Kmean_matrix;
  41. end
  42. end
  43. %%重新计算平均值与所有点的距离
  44. distance_2 = 0;
  45. for i=1:size(num,1)
  46. for l = 1:size(Kmean_matrix,1)
  47. for j = 2:size(num,2)
  48. distance_2 = distance_2 + (num(i,j) - Kmean_matrix(l,j-1))^2;
  49. end
  50. distance_matrix(i,l) = sqrt(distance_2);
  51. distance_2 = 0;
  52. end
  53. end
  54. %%计算总体方差:
  55. %先分类,因为要找到4个类对应的方差
  56. for j = 1:k
  57. flag = 1;
  58. for i = 1:size(index)
  59. if index(i) == j
  60. Classifiction_matrix(j,flag) = i;
  61. flag = flag+1;
  62. else
  63. Classifiction_matrix(j,flag) = 0;
  64. flag = flag+1;
  65. end
  66. end
  67. end
  68. %%找到每个类对应的方差
  69. distance = 0;
  70. for i = 1:k
  71. for j = 1:size(Classifiction_matrix,2)
  72. if Classifiction_matrix(i,j) ~= 0
  73. for l = 2:size(num,2)
  74. distance = distance + (num(Classifiction_matrix(i,j),l)-Kmean_matrix(i,l-1))^2;
  75. end
  76. end
  77. end
  78. end
  79. disp('中心距离');
  80. disp(distance);
  81. s = s+1;
  82. disp(s)
  83. end