Examples

rand('twister',5489) has been used many times in this page. It simply resets the Matlab random number generator, so that the kmeans (or, litekmeans) will have exactly the same initialization. Thus, you can exactly reproduce the results reported on this page. If you are using Matlab 2011b or later version, you can use rng('default') instead of rand('twister',5489).

Incremental active learning using MAED

Code download

%Two circle data with noise. We already have 4 labeled examples marked as red *
clear;	
rand('twister',5489);	
[fea, gnd] = GenTwoNoisyCircle();
split = gnd ==1;
figure(1);
plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b');
splitLabel = false(length(gnd),1);
splitLabel(1) = true;
splitLabel(50) = true;
splitLabel(100) = true;
splitLabel(150) = true;
hold on;
plot(fea(splitLabel,1),fea(splitLabel,2),'*r');
hold off;

%TED is asked to select 4 more examples (marked as red * and number)
options = [];
options.KernelType = 'Gaussian';
options.t = 0.5;
options.ReguBeta = 0;
%MAED boils down to TED when ReguBeta = 0;
options.splitLabel = splitLabel;
smpRank = MAED(fea,4,options);
figure(2);
plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b');
hold on;
plot(fea(splitLabel,1),fea(splitLabel,2),'*r');
for i = 1:length(smpRank)
  plot(fea(smpRank(i),1),fea(smpRank(i),2),'*r');
  text(fea(smpRank(i),1),fea(smpRank(i),2),['\fontsize{16} \color{red}',num2str(i)]);
end
hold off;

%MAED is asked to select 4 more examples (marked as red * and number)
options = [];
options.KernelType = 'Gaussian';
options.t = 0.5;
options.ReguBeta = 100;
options.splitLabel = splitLabel;
smpRank = MAED(fea,4,options);
figure(3);
plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b');
hold on;
plot(fea(splitLabel,1),fea(splitLabel,2),'*r');
for i = 1:length(smpRank)
  plot(fea(smpRank(i),1),fea(smpRank(i),2),'*r');
  text(fea(smpRank(i),1),fea(smpRank(i),2),['\fontsize{16} \color{red}',num2str(i)]);
end
hold off;

Out-of-sample retrieval using EMR

Code download

%Ranking on the USPS data set (9298 samples with 256 dimensions)
clear;
load('USPS.mat');	
gnd = gnd - 1;

%EMR model learning
rand('twister',5489);
opts = [];
opts.p = 500;
[dump, model] = EMR(fea,zeros(size(fea,1),1),opts);


%Generate a query point out of the database.
digit = 2;
idx = find(gnd == digit);
x = mean(fea(idx(1:5),:));

%Ranking with Euclidean distance 
D = EuDist2(x,fea);
[dump,idx]=sort(D);
showfea = fea(idx(2:100),:);
Y = ones(160,160)*-1;
Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %'
for i=1:9
  for j=0:9
    Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %'
  end
end
imagesc(Y);colormap(gray);

%Ranking with Efficient Manifold ranking
tic;
[y] = EMRtest(x,model);
toc;
%Elapsed time is 0.015944 seconds.
[dump,idx]=sort(-y);
showfea = fea(idx(2:100),:);
Y = ones(160,160)*-1;
Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %'
for i=1:9
  for j=0:9
    Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %'
  end
end
imagesc(Y);colormap(gray);

%Generate a query point out of the database.
digit = 5;
idx = find(gnd == digit);
x = mean(fea(idx([15,676]),:));

%Ranking with Euclidean distance 
D = EuDist2(x,fea);
[dump,idx]=sort(D);
showfea = fea(idx(2:100),:);
Y = ones(160,160)*-1;
Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %'
for i=1:9
  for j=0:9
    Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %'
  end
end
imagesc(Y);colormap(gray);

%Ranking with Efficient Manifold ranking
tic;
[y] = EMRtest(x,model);
toc;
%Elapsed time is 0.015944 seconds.
[dump,idx]=sort(-y);
showfea = fea(idx(2:100),:);
Y = ones(160,160)*-1;
Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %'
for i=1:9
  for j=0:9
    Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %'
  end
end
imagesc(Y);colormap(gray);

Supervised feature selection using MCFS

Code download

%Classification on USPS 
load('USPS.mat');	

%Classification by SRDA using all the features.
options = [];
options.ReguAlpha = 10;
for nTrain = [2000 3000 4000 5000 6000 7291]
  model = SRDAtrain(fea(1:nTrain,:), gnd(1:nTrain), options);
  accuracy = SRDApredict(fea(7292:end,:), gnd(7292:end), model);
  disp(['SRDA on all ',num2str(size(fea,2)),' features, ',num2str(nTrain),' Train, Errorrate:',num2str(1-accuracy)]);
end
SRDA on all 256 features, 2000 Train, Errorrate:0.12805
SRDA on all 256 features, 3000 Train, Errorrate:0.12207
SRDA on all 256 features, 4000 Train, Errorrate:0.11958
SRDA on all 256 features, 5000 Train, Errorrate:0.12357
SRDA on all 256 features, 6000 Train, Errorrate:0.11908
SRDA on all 256 features, 7291 Train, Errorrate:0.11958

%Classification by SRDA using MCFS selected features.
for nTrain = [2000 3000 4000 5000 6000 7291]
  MCFSoptions = [];
  MCFSoptions.gnd = gnd(1:nTrain);  
  FeaNumCandi = [40];
  [FeaIndex,FeaNumCandi] = MCFS_p(fea(1:nTrain,:),FeaNumCandi,MCFSoptions);
  for i = 1:length(FeaNumCandi)
    SelectFeaIdx = FeaIndex{i};
    model = SRDAtrain(fea(1:nTrain,SelectFeaIdx), gnd(1:nTrain), options);
    accuracy = SRDApredict(fea(8001:end,SelectFeaIdx), gnd(8001:end), model);
    disp(['SRDA on selected ',num2str(FeaNumCandi(i)),' features, ',num2str(nTrain),' Train, Errorrate:',num2str(1-accuracy)]);
  end
end
SRDA on selected 40 features, 2000 Train, Errorrate:0.12635
SRDA on selected 40 features, 3000 Train, Errorrate:0.13405
SRDA on selected 40 features, 4000 Train, Errorrate:0.13174
SRDA on selected 40 features, 5000 Train, Errorrate:0.13328
SRDA on selected 40 features, 6000 Train, Errorrate:0.14022
SRDA on selected 40 features, 7291 Train, Errorrate:0.13713

Clustering in the spectral regression subspace

Code download

%Clustering on COIL20
load('COIL20.mat');

%Clustering in the original space
rand('twister',5489);
label = litekmeans(fea,20,'Replicates',20);
MIhat = MutualInfo(gnd,label)
%MIhat: 0.7606

%SR Learning
USRoptions.ReducedDim = length(unique(gnd))+10;
model = USRtrain(fea, USRoptions);
feaNew = USRtest(fea, length(unique(gnd)), model);

%Clustering in the SR subspace
rand('twister',5489);
labelNew = litekmeans(feaNew,20,'Replicates',20);
MIhatNew = MutualInfo(gnd,labelNew)
%MIhatNew: 0.8974

Clustering in the kernel spectral regression subspace

Code download

%Clustering on USPS 
load('USPS.mat');	

nTrain = 6000;
feaTrain = fea(1:nTrain,:);
gndTrain = gnd(1:nTrain); 
feaTest = fea(nTrain+1:end,:); 
gndTest = gnd(nTrain+1:end); 
 
%-------------------------------------- 	
%Clustering in the original space
rand('twister',5489);
labelTrain = litekmeans(feaTrain,10,'Replicates',20); 
MIhatTrain = MutualInfo(gndTrain,labelTrain) 
%MIhatTrain: 0.6343 

rand('twister',5489);
labelTest = litekmeans(feaTest,10,'Replicates',20); 
MIhatTest = MutualInfo(gndTest,labelTest) 
%MIhatTest: 0.5835 
 
%SR Learning
USRoptions.ReducedDim = length(unique(gndTrain))+10; 
model = USRtrain(feaTrain, USRoptions); 

%Clustering in the SR subspace	 
feaTrainSR = USRtest(feaTrain, length(unique(gndTrain)), model); 
rand('twister',5489);
labelTrainSR = litekmeans(feaTrainSR,10,'Replicates',20); 
MIhatTrainSR = MutualInfo(gndTrain,labelTrainSR) 
%MIhatTrainSR: 0.7251 
 
feaTestSR = USRtest(feaTest, length(unique(gndTrain)), model); 
rand('twister',5489);
labelTestSR = litekmeans(feaTestSR,10,'Replicates',20); 
MIhatTestSR = MutualInfo(gndTest,labelTestSR) 
%MIhatTestSR: 0.6809 
 
%KSR Learning
UKSRoptions.ReducedDim = length(unique(gndTrain))+10; 
model = UKSRtrain(feaTrain, UKSRoptions); 

%Clustering in the KSR subspace
feaTrainKSR = UKSRtest(feaTrain, length(unique(gndTrain)), model); 
rand('twister',5489);
labelTrainKSR = litekmeans(feaTrainKSR,10,'Replicates',20); 
MIhatTrainKSR = MutualInfo(gndTrain,labelTrainKSR) 
%MIhatTrainKSR: 0.8195 
 
feaTestKSR = UKSRtest(feaTest, length(unique(gndTrain)), model); 
rand('twister',5489);
labelTestKSR = litekmeans(feaTestKSR,10,'Replicates',20); 
MIhatTestKSR = MutualInfo(gndTest,labelTestKSR) 
%MIhatTestKSR: 0.8070

Spectral Kernel Discriminant Analysis for Semi-supervised Classification

Code download

%Clustering on USPS 
load('USPS.mat');	

nLabel = 500;

feaLabel = fea(1:nLabel,:);
gndLabel = gnd(1:nLabel);
feaTrain = fea(nLabel+1:7291,:);

feaTest = fea(7292:end,:);
gndTest = gnd(7292:end);

%-------------------------------------- 	
%Training SRKDA on labeled data
options = [];
options.KernelType = 'Gaussian';
options.t = 10;
options.ReguAlpha = 0.01;

model = SRKDAtrain(feaLabel, gndLabel, options);
accuracy = SRKDApredict(feaTest, gndTest, model);
Errorrate = 1-accuracy
%Errorrate: 0.0947

%-------------------------------------- 	
%If we have some additional training data (without label)
options.ReguBeta = 10;
model = SRKDAtrain(feaLabel, gndLabel, options, feaTrain);
accuracy = SRKDApredict(feaTest, gndTest, model);
Errorrate = 1-accuracy
%Errorrate: 0.0708

%If SRKDA can see the test data (without label)
model = SRKDAtrain(feaLabel, gndLabel, options, [feaTrain;feaTest]);
accuracy = SRKDApredict(feaTest, gndTest, model);
Errorrate = 1-accuracy
%Errorrate: 0.0648

Return to Codes and Data