一、基本原理
ISODATA(迭代自组织分析算法)在K-means的基础上对分类过程增加了“合并”和“分裂”两个操作。不同于K-Means需要提前确认分类的类别数目,ISODATA通过给定的参数在聚类过程中自动调整类别个数和类别中心,使聚类结果能更靠近客观真实的聚类结果。
二、程序步骤







三、具体代码(无matlab源码)
function ISODATA(x,K,theta_N,theta_S,theta_c,L,I)%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%input parameters%%%%%%% x : data% K : 预期的聚类中心数% theta_N : 每一聚类中心中最少的样本数,少于此数就不作为一个独立的聚类% theta_S :一个聚类中样本距离分布的标准差% theta_c : 两聚类中心之间的最小距离,如小于此数,两个聚类进行合并% L : 在一次迭代运算中可以和并的聚类中心的最多对数% I :迭代运算的次数序号%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% step1n = size(x,1);N_c = K;mean = cell(K,1);for i=1:Kmean{i} = x(i,:);endite = 1;while ite<Iflag = 1;while flag%% step2class = cell(size(mean));for i=1:nnum = Belong2(x(i,:),mean);class{num} = [class{num};x(i,:)];end%% step3for i=1:N_csize_i = size(class{i},1);if size_i<theta_Nclass_i = class{i};mean = DeleteRow(mean,i);class = DeleteRow(class,i);N_c = N_c-1;for j=1:size_iclass_ij = class_i(j,:);%the j'th row of class{i}num = Belong2(class_ij,mean);class{num} = [class{num};class_ij];endendend%% step4for i=1:N_cif ~isempty(mean{i})mean{i} = sum(class{i})./size(class{i},1);endend%% step5Dis = zeros(N_c,1);for i=1:N_cif ~isempty(class{i})N_i =size(class{i},1);tmp = bsxfun(@minus,class{i},mean{i});Dis(i) = sum(arrayfun(@(x)norm(tmp(x,:)),1:N_i))/N_i;endend%% step6D = 0;for i=1:N_cif ~isempty(class{i})N_i =size(class{i},1);D = D + N_i*Dis(i);endendD = D/n;%% step7flag = 0;if ite == Itheta_c = 0;flag = 0;elseif ~(N_c > K/2)flag = 1;elseif mod(ite,2)==0 || ~(N_c<2*K)flag = 0;end%% 分裂处理%% step8if flagflag = 0;delta = cell(N_c,1);for i=1:N_cif ~isempty(class{i})N_i =size(class{i},1);tmp = bsxfun(@minus,class{i},mean{i});delta{i} = arrayfun(@(x)norm(tmp(:,x)),1:size(tmp,2))/N_i;endend%% step9delta_max = cell(N_c,1);for i=1:N_cif ~isempty(class{i})max_i = max(delta{i});sub = find(delta{i}==max_i,1);delta_max{i} = [max_i,sub];endend%% step10for i=1:N_cif delta_max{i}(1) > theta_SN_i =size(class{i},1);con1 = (Dis(i)>D && N_i>2*(theta_N + 1));con2 = ~(N_c>K/2);if con1 || con2%%%%这里分裂%%%%%flag = 1;%一旦发生分裂,那么分裂一次后就返回第二步;若没发生分裂,则直接进入合并处理步lamda = 0.5;max_sub = delta_max{i}(2);mean{i}(max_sub) = mean{i}(max_sub) + lamda * delta_max{i}(1);addOneMean = mean{i};addOneMean(max_sub) = addOneMean(max_sub) - lamda * delta_max{i}(1);mean = [mean;addOneMean];N_c = N_c+1;break;endendendendend%% 合并处理if L%% step11Distance = zeros(N_c,N_c);for i=1:N_c-1for j=i:N_cDistance(i,j) = norm(mean{i}-mean{j});endend%% step12index = find(-Distance>theta_c);keepIndex = [Distance(index),index];[~, index] = sort(keepIndex(:,1));if size(index,1) > Lindex = index(1:L,:);end%% step13if size(index,1) ~= 0for id=1:size(index,1)[m_i m_j]= seq2idx(index(id),N_c);%%%%%这里合并%%%%%N_mi = size(class{m_i},1);N_mj = size(class{m_j},1);mean{m_i} = (N_mi*mean{m_i} + N_mj*mean{m_j})/(N_mi+N_mj);mean = DeleteRow(mean,m_j);class{m_i} = [class{m_i};class{m_j}];class = DeleteRow(class,m_j);endendend%% step14ite=ite+1;endfor i=1:N_cfprintf('第%d类聚类中心为\n',i);disp(mean{i});fprintf('第%d类中元素为\n',i);disp(class{i});endend%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%function number = Belong2(x_i,means)INF = 10000;min = INF;kk = size(means,1);number = 1;for i=1:kkif ~isempty(means{i})if norm(x_i - means{i}) < minmin = norm(x_i - means{i});number = i;endendendendfunction A_del = DeleteRow(A,r)n = size(A,1);if r == 1A_del = A(2:n,:);elseif r == nA_del = A(1:n-1,:);elseA_del = [A(1:r-1,:);A(r+1:n,:)];endendfunction [row col] = seq2idx(id,n)if mod(id,n)==0row = n;col = id/n;elserow = mod(id,n);col = ceil(id/n);endend
