一、基本原理

ISODATA(迭代自组织分析算法)在K-means的基础上对分类过程增加了“合并”和“分裂”两个操作。不同于K-Means需要提前确认分类的类别数目,ISODATA通过给定的参数在聚类过程中自动调整类别个数和类别中心,使聚类结果能更靠近客观真实的聚类结果。

二、程序步骤

image.png
image.png
image.png
image.png
image.png
image.png
image.png

三、具体代码(无matlab源码)

参考自CSDN:https://blog.csdn.net/qq_32642107/article/details/89925395?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522165525609616781483784668%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=165525609616781483784668&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-89925395-null-null.142^v14^pc_search_result_control_group,157^v14^new_3&utm_term=ISODATA&spm=1018.2226.3001.4187

  1. function ISODATA(x,K,theta_N,theta_S,theta_c,L,I)
  2. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  3. %%%%%%%%input parameters%%%%%%
  4. % x : data
  5. % K : 预期的聚类中心数
  6. % theta_N : 每一聚类中心中最少的样本数,少于此数就不作为一个独立的聚类
  7. % theta_S :一个聚类中样本距离分布的标准差
  8. % theta_c : 两聚类中心之间的最小距离,如小于此数,两个聚类进行合并
  9. % L : 在一次迭代运算中可以和并的聚类中心的最多对数
  10. % I :迭代运算的次数序号
  11. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  12. %% step1
  13. n = size(x,1);
  14. N_c = K;
  15. mean = cell(K,1);
  16. for i=1:K
  17. mean{i} = x(i,:);
  18. end
  19. ite = 1;
  20. while ite<I
  21. flag = 1;
  22. while flag
  23. %% step2
  24. class = cell(size(mean));
  25. for i=1:n
  26. num = Belong2(x(i,:),mean);
  27. class{num} = [class{num};x(i,:)];
  28. end
  29. %% step3
  30. for i=1:N_c
  31. size_i = size(class{i},1);
  32. if size_i<theta_N
  33. class_i = class{i};
  34. mean = DeleteRow(mean,i);
  35. class = DeleteRow(class,i);
  36. N_c = N_c-1;
  37. for j=1:size_i
  38. class_ij = class_i(j,:);%the j'th row of class{i}
  39. num = Belong2(class_ij,mean);
  40. class{num} = [class{num};class_ij];
  41. end
  42. end
  43. end
  44. %% step4
  45. for i=1:N_c
  46. if ~isempty(mean{i})
  47. mean{i} = sum(class{i})./size(class{i},1);
  48. end
  49. end
  50. %% step5
  51. Dis = zeros(N_c,1);
  52. for i=1:N_c
  53. if ~isempty(class{i})
  54. N_i =size(class{i},1);
  55. tmp = bsxfun(@minus,class{i},mean{i});
  56. Dis(i) = sum(arrayfun(@(x)norm(tmp(x,:)),1:N_i))/N_i;
  57. end
  58. end
  59. %% step6
  60. D = 0;
  61. for i=1:N_c
  62. if ~isempty(class{i})
  63. N_i =size(class{i},1);
  64. D = D + N_i*Dis(i);
  65. end
  66. end
  67. D = D/n;
  68. %% step7
  69. flag = 0;
  70. if ite == I
  71. theta_c = 0;
  72. flag = 0;
  73. elseif ~(N_c > K/2)
  74. flag = 1;
  75. elseif mod(ite,2)==0 || ~(N_c<2*K)
  76. flag = 0;
  77. end
  78. %% 分裂处理
  79. %% step8
  80. if flag
  81. flag = 0;
  82. delta = cell(N_c,1);
  83. for i=1:N_c
  84. if ~isempty(class{i})
  85. N_i =size(class{i},1);
  86. tmp = bsxfun(@minus,class{i},mean{i});
  87. delta{i} = arrayfun(@(x)norm(tmp(:,x)),1:size(tmp,2))/N_i;
  88. end
  89. end
  90. %% step9
  91. delta_max = cell(N_c,1);
  92. for i=1:N_c
  93. if ~isempty(class{i})
  94. max_i = max(delta{i});
  95. sub = find(delta{i}==max_i,1);
  96. delta_max{i} = [max_i,sub];
  97. end
  98. end
  99. %% step10
  100. for i=1:N_c
  101. if delta_max{i}(1) > theta_S
  102. N_i =size(class{i},1);
  103. con1 = (Dis(i)>D && N_i>2*(theta_N + 1));
  104. con2 = ~(N_c>K/2);
  105. if con1 || con2
  106. %%%%这里分裂%%%%%
  107. flag = 1;%一旦发生分裂,那么分裂一次后就返回第二步;若没发生分裂,则直接进入合并处理步
  108. lamda = 0.5;
  109. max_sub = delta_max{i}(2);
  110. mean{i}(max_sub) = mean{i}(max_sub) + lamda * delta_max{i}(1);
  111. addOneMean = mean{i};
  112. addOneMean(max_sub) = addOneMean(max_sub) - lamda * delta_max{i}(1);
  113. mean = [mean;addOneMean];
  114. N_c = N_c+1;
  115. break;
  116. end
  117. end
  118. end
  119. end
  120. end
  121. %% 合并处理
  122. if L
  123. %% step11
  124. Distance = zeros(N_c,N_c);
  125. for i=1:N_c-1
  126. for j=i:N_c
  127. Distance(i,j) = norm(mean{i}-mean{j});
  128. end
  129. end
  130. %% step12
  131. index = find(-Distance>theta_c);
  132. keepIndex = [Distance(index),index];
  133. [~, index] = sort(keepIndex(:,1));
  134. if size(index,1) > L
  135. index = index(1:L,:);
  136. end
  137. %% step13
  138. if size(index,1) ~= 0
  139. for id=1:size(index,1)
  140. [m_i m_j]= seq2idx(index(id),N_c);
  141. %%%%%这里合并%%%%%
  142. N_mi = size(class{m_i},1);
  143. N_mj = size(class{m_j},1);
  144. mean{m_i} = (N_mi*mean{m_i} + N_mj*mean{m_j})/(N_mi+N_mj);
  145. mean = DeleteRow(mean,m_j);
  146. class{m_i} = [class{m_i};class{m_j}];
  147. class = DeleteRow(class,m_j);
  148. end
  149. end
  150. end
  151. %% step14
  152. ite=ite+1;
  153. end
  154. for i=1:N_c
  155. fprintf('第%d类聚类中心为\n',i);
  156. disp(mean{i});
  157. fprintf('第%d类中元素为\n',i);
  158. disp(class{i});
  159. end
  160. end
  161. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  162. function number = Belong2(x_i,means)
  163. INF = 10000;
  164. min = INF;
  165. kk = size(means,1);
  166. number = 1;
  167. for i=1:kk
  168. if ~isempty(means{i})
  169. if norm(x_i - means{i}) < min
  170. min = norm(x_i - means{i});
  171. number = i;
  172. end
  173. end
  174. end
  175. end
  176. function A_del = DeleteRow(A,r)
  177. n = size(A,1);
  178. if r == 1
  179. A_del = A(2:n,:);
  180. elseif r == n
  181. A_del = A(1:n-1,:);
  182. else
  183. A_del = [A(1:r-1,:);A(r+1:n,:)];
  184. end
  185. end
  186. function [row col] = seq2idx(id,n)
  187. if mod(id,n)==0
  188. row = n;
  189. col = id/n;
  190. else
  191. row = mod(id,n);
  192. col = ceil(id/n);
  193. end
  194. end