%Two circle data with noise. We already have 4 labeled examples marked as red * clear; rand('twister',5489); [fea, gnd] = GenTwoNoisyCircle(); split = gnd ==1; figure(1); plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b'); splitLabel = false(length(gnd),1); splitLabel(1) = true; splitLabel(50) = true; splitLabel(100) = true; splitLabel(150) = true; hold on; plot(fea(splitLabel,1),fea(splitLabel,2),'*r'); hold off; %TED is asked to select 4 more examples (marked as red * and number) options = []; options.KernelType = 'Gaussian'; options.t = 0.5; options.ReguBeta = 0; %MAED boils down to TED when ReguBeta = 0; options.splitLabel = splitLabel; smpRank = MAED(fea,4,options); figure(2); plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b'); hold on; plot(fea(splitLabel,1),fea(splitLabel,2),'*r'); for i = 1:length(smpRank) plot(fea(smpRank(i),1),fea(smpRank(i),2),'*r'); text(fea(smpRank(i),1),fea(smpRank(i),2),['\fontsize{16} \color{red}',num2str(i)]); end hold off; %MAED is asked to select 4 more examples (marked as red * and number) options = []; options.KernelType = 'Gaussian'; options.t = 0.5; options.ReguBeta = 100; options.splitLabel = splitLabel; smpRank = MAED(fea,4,options); figure(3); plot(fea(split,1),fea(split,2),'.k',fea(~split,1),fea(~split,2),'.b'); hold on; plot(fea(splitLabel,1),fea(splitLabel,2),'*r'); for i = 1:length(smpRank) plot(fea(smpRank(i),1),fea(smpRank(i),2),'*r'); text(fea(smpRank(i),1),fea(smpRank(i),2),['\fontsize{16} \color{red}',num2str(i)]); end hold off;
%Ranking on the USPS data set (9298 samples with 256 dimensions) clear; load('USPS.mat'); gnd = gnd - 1; %EMR model learning rand('twister',5489); opts = []; opts.p = 500; [dump, model] = EMR(fea,zeros(size(fea,1),1),opts); %Generate a query point out of the database. digit = 2; idx = find(gnd == digit); x = mean(fea(idx(1:5),:)); %Ranking with Euclidean distance D = EuDist2(x,fea); [dump,idx]=sort(D); showfea = fea(idx(2:100),:); Y = ones(160,160)*-1; Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %' for i=1:9 for j=0:9 Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %' end end imagesc(Y);colormap(gray); %Ranking with Efficient Manifold ranking tic; [y] = EMRtest(x,model); toc; %Elapsed time is 0.015944 seconds. [dump,idx]=sort(-y); showfea = fea(idx(2:100),:); Y = ones(160,160)*-1; Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %' for i=1:9 for j=0:9 Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %' end end imagesc(Y);colormap(gray); %Generate a query point out of the database. digit = 5; idx = find(gnd == digit); x = mean(fea(idx([15,676]),:)); %Ranking with Euclidean distance D = EuDist2(x,fea); [dump,idx]=sort(D); showfea = fea(idx(2:100),:); Y = ones(160,160)*-1; Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %' for i=1:9 for j=0:9 Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %' end end imagesc(Y);colormap(gray); %Ranking with Efficient Manifold ranking tic; [y] = EMRtest(x,model); toc; %Elapsed time is 0.015944 seconds. [dump,idx]=sort(-y); showfea = fea(idx(2:100),:); Y = ones(160,160)*-1; Y(1:16,4*16+1:5*16) = reshape(x,[16,16])'; %' for i=1:9 for j=0:9 Y(i*16+1:(i+1)*16,j*16+1:(j+1)*16) = reshape(showfea((i-1)*10+j+1,:),[16,16])'; %' end end imagesc(Y);colormap(gray);
%Classification on USPS load('USPS.mat'); %Classification by SRDA using all the features. options = []; options.ReguAlpha = 10; for nTrain = [2000 3000 4000 5000 6000 7291] model = SRDAtrain(fea(1:nTrain,:), gnd(1:nTrain), options); accuracy = SRDApredict(fea(7292:end,:), gnd(7292:end), model); disp(['SRDA on all ',num2str(size(fea,2)),' features, ',num2str(nTrain),' Train, Errorrate:',num2str(1-accuracy)]); end SRDA on all 256 features, 2000 Train, Errorrate:0.12805 SRDA on all 256 features, 3000 Train, Errorrate:0.12207 SRDA on all 256 features, 4000 Train, Errorrate:0.11958 SRDA on all 256 features, 5000 Train, Errorrate:0.12357 SRDA on all 256 features, 6000 Train, Errorrate:0.11908 SRDA on all 256 features, 7291 Train, Errorrate:0.11958 %Classification by SRDA using MCFS selected features. for nTrain = [2000 3000 4000 5000 6000 7291] MCFSoptions = []; MCFSoptions.gnd = gnd(1:nTrain); FeaNumCandi = [40]; [FeaIndex,FeaNumCandi] = MCFS_p(fea(1:nTrain,:),FeaNumCandi,MCFSoptions); for i = 1:length(FeaNumCandi) SelectFeaIdx = FeaIndex{i}; model = SRDAtrain(fea(1:nTrain,SelectFeaIdx), gnd(1:nTrain), options); accuracy = SRDApredict(fea(8001:end,SelectFeaIdx), gnd(8001:end), model); disp(['SRDA on selected ',num2str(FeaNumCandi(i)),' features, ',num2str(nTrain),' Train, Errorrate:',num2str(1-accuracy)]); end end SRDA on selected 40 features, 2000 Train, Errorrate:0.12635 SRDA on selected 40 features, 3000 Train, Errorrate:0.13405 SRDA on selected 40 features, 4000 Train, Errorrate:0.13174 SRDA on selected 40 features, 5000 Train, Errorrate:0.13328 SRDA on selected 40 features, 6000 Train, Errorrate:0.14022 SRDA on selected 40 features, 7291 Train, Errorrate:0.13713
%Clustering on COIL20 load('COIL20.mat'); %Clustering in the original space rand('twister',5489); label = litekmeans(fea,20,'Replicates',20); MIhat = MutualInfo(gnd,label) %MIhat: 0.7606 %SR Learning USRoptions.ReducedDim = length(unique(gnd))+10; model = USRtrain(fea, USRoptions); feaNew = USRtest(fea, length(unique(gnd)), model); %Clustering in the SR subspace rand('twister',5489); labelNew = litekmeans(feaNew,20,'Replicates',20); MIhatNew = MutualInfo(gnd,labelNew) %MIhatNew: 0.8974
%Clustering on USPS load('USPS.mat'); nTrain = 6000; feaTrain = fea(1:nTrain,:); gndTrain = gnd(1:nTrain); feaTest = fea(nTrain+1:end,:); gndTest = gnd(nTrain+1:end); %-------------------------------------- %Clustering in the original space rand('twister',5489); labelTrain = litekmeans(feaTrain,10,'Replicates',20); MIhatTrain = MutualInfo(gndTrain,labelTrain) %MIhatTrain: 0.6343 rand('twister',5489); labelTest = litekmeans(feaTest,10,'Replicates',20); MIhatTest = MutualInfo(gndTest,labelTest) %MIhatTest: 0.5835 %SR Learning USRoptions.ReducedDim = length(unique(gndTrain))+10; model = USRtrain(feaTrain, USRoptions); %Clustering in the SR subspace feaTrainSR = USRtest(feaTrain, length(unique(gndTrain)), model); rand('twister',5489); labelTrainSR = litekmeans(feaTrainSR,10,'Replicates',20); MIhatTrainSR = MutualInfo(gndTrain,labelTrainSR) %MIhatTrainSR: 0.7251 feaTestSR = USRtest(feaTest, length(unique(gndTrain)), model); rand('twister',5489); labelTestSR = litekmeans(feaTestSR,10,'Replicates',20); MIhatTestSR = MutualInfo(gndTest,labelTestSR) %MIhatTestSR: 0.6809 %KSR Learning UKSRoptions.ReducedDim = length(unique(gndTrain))+10; model = UKSRtrain(feaTrain, UKSRoptions); %Clustering in the KSR subspace feaTrainKSR = UKSRtest(feaTrain, length(unique(gndTrain)), model); rand('twister',5489); labelTrainKSR = litekmeans(feaTrainKSR,10,'Replicates',20); MIhatTrainKSR = MutualInfo(gndTrain,labelTrainKSR) %MIhatTrainKSR: 0.8195 feaTestKSR = UKSRtest(feaTest, length(unique(gndTrain)), model); rand('twister',5489); labelTestKSR = litekmeans(feaTestKSR,10,'Replicates',20); MIhatTestKSR = MutualInfo(gndTest,labelTestKSR) %MIhatTestKSR: 0.8070
%Clustering on USPS load('USPS.mat'); nLabel = 500; feaLabel = fea(1:nLabel,:); gndLabel = gnd(1:nLabel); feaTrain = fea(nLabel+1:7291,:); feaTest = fea(7292:end,:); gndTest = gnd(7292:end); %-------------------------------------- %Training SRKDA on labeled data options = []; options.KernelType = 'Gaussian'; options.t = 10; options.ReguAlpha = 0.01; model = SRKDAtrain(feaLabel, gndLabel, options); accuracy = SRKDApredict(feaTest, gndTest, model); Errorrate = 1-accuracy %Errorrate: 0.0947 %-------------------------------------- %If we have some additional training data (without label) options.ReguBeta = 10; model = SRKDAtrain(feaLabel, gndLabel, options, feaTrain); accuracy = SRKDApredict(feaTest, gndTest, model); Errorrate = 1-accuracy %Errorrate: 0.0708 %If SRKDA can see the test data (without label) model = SRKDAtrain(feaLabel, gndLabel, options, [feaTrain;feaTest]); accuracy = SRKDApredict(feaTest, gndTest, model); Errorrate = 1-accuracy %Errorrate: 0.0648