Plot the figure of K-SVCR

clear

%% generate data
 
prettySpiral = 0;
 
if ~prettySpiral
    % generate some random gaussian like data
    rand('state', 0);
    randn('state', 0);
    N= 50;
    D= 2;
 
    X1 = mgd(N, D, [4 3], [2 -1;-1 2]);
    X2 = mgd(N, D, [1 1], [2 1;1 1]);
    X3 = mgd(N, D, [3 -3], [1 0;0 4]);
 
    X= [X1; X2; X3];
    X= bsxfun(@rdivide, bsxfun(@minus, X, mean(X)), var(X));
    y= [ones(N, 1); ones(N, 1)*2; ones(N, 1)*3];
 
    scatter(X(:,1), X(:,2), 20, y)
 
else
    % generate twirl data!
 
    N= 50;
    t = linspace(0.5, 2*pi, N);
    x = t.*cos(t);
    y = t.*sin(t);
 
    t = linspace(0.5, 2*pi, N);
    x2 = t.*cos(t+2);
    y2 = t.*sin(t+2);
 
    t = linspace(0.5, 2*pi, N);
    x3 = t.*cos(t+4);
    y3 = t.*sin(t+4);
 
    X= [[x' y']; [x2' y2']; [x3' y3']];
    X= bsxfun(@rdivide, bsxfun(@minus, X, mean(X)), var(X));
    y= [ones(N, 1); ones(N, 1)*2; ones(N, 1)*3];
 
    scatter(X(:,1), X(:,2), 20, y)
end

%% classify
rho = 1;
c1 =10;
c2 =10;
epsilon = 0.2;
threshold = 0;

result=[];
%  ker = 'linear';
 ker = 'rbf';
sigma = 1/10;%sigma = 1/200;
par = NonLinearDualBoundSVORIM(X, y, c1, c2, epsilon, rho, ker, sigma);
%f = TestPrecisionNonLinear(par,X, y,X, y, ker,epsilon,sigma);

%% Plot the figure
contour_level = [-epsilon,0, epsilon];
xrange = [-1.5 1.5];
yrange = [-1.5 1.5];
% step size for how finely you want to visualize the decision boundary.
inc = 0.005;
% generate grid coordinates. this will be the basis of the decision
% boundary visualization.
[x1, x2] = meshgrid(xrange(1):inc:xrange(2), yrange(1):inc:yrange(2));
% size of the (x, y) image, which will also be the size of the
% decision boundary image that is used as the plot background.
image_size = size(x1);

xy = [x1(:) x2(:)]; % make (x,y) pairs as a bunch of row vectors.

% set up the domain over which you want to visualize the decision
% boundary
% d = [];
% for k=1:max(y)
%     par.normw{k}=1;
%     d(:,k) =  decisionfun(xy, par, X,y,k,epsilon, ker,sigma)';
% end
% [~,idx] = min(abs(d)/par.normw{k},[],2);
% nd=max(y);

nd = (max(y)*(max(y)-1)/2);
d = []; pred=zeros(size(xy,1),nd);
for k=1:nd
    par.normw{k}=1;
    d(:,k) =  decisionfun(xy, par, X,y,k,epsilon, ker,sigma)';
end
pred(d<-threshold) = -1; pred(d >threshold) = 1;
nclass = max(y);
expLosses=zeros(size(pred,1),nclass);

for i=1:nclass,
    expLosses(:,i) = sum(pred == repmat(par.Code(i,:),size(pred,1),1),2);
end
[minVal,finalOutput] = max(expLosses,[],2);
idx = finalOutput;

plt = 2; %1, just show the decison region with different colors; 2, show the decision hyperlane between class 1 and class 3
switch plt
    case 1
        % reshape the idx (which contains the class label) into an image.
        decisionmap = reshape(idx, image_size);
         imagesc(xrange,yrange,decisionmap);
        % plot the class training data.
        hold on;
        set(gca,'ydir','normal');
        cmap = [1 0.8 0.8; 0.95 1 0.95; 0.9 0.9 1];
        colormap(cmap);
        plot(X(y==1,1), X(y==1,2), 'o', 'MarkerFaceColor', [.9 .3 .3], 'MarkerEdgeColor','k');
        plot(X(y==2,1), X(y==2,2), 'o', 'MarkerFaceColor', [.3 .9 .3], 'MarkerEdgeColor','k');
        plot(X(y==3,1), X(y==3,2), 'o', 'MarkerFaceColor', [.3 .3 .9], 'MarkerEdgeColor','k');
        hold on;
        % title(sprintf('%d trees, Train time: %.2fs, Test time: %.2fs
', opts.numTrees, timetrain, timetest));
    case 2
        %% show SVs
        color = {[.9 .3 .3],[.3 .9 .3],[.3 .3 .9]};
        SVs = (par.SVs{2}>1e-4);
        for i=1:max(y)
            % show the SVs using biger marker
            plot(X(y==i&SVs==1,1),X(y==i&SVs==1,2), 'o', 'MarkerFaceColor', color{i}, 'MarkerEdgeColor','k'); 
            hold on
            % plot the points of not SVs
            plot(X(y==i&SVs~=1,1),X(y==i&SVs~=1,2), 'o', 'MarkerFaceColor', color{i}, 'MarkerEdgeColor',color{i});  
        end
        hold on;
        title(sprintf('Ratio of SVs is %.2fs
', mean(SVs)));
        color = {'r-','g-','b*','r.','go','b*'};
        color1 = {'r-','g--','b*','r.','go','b*'};
        contour_level1 = [-epsilon, 0, epsilon];
        contour_level2 = [-epsilon, 0, epsilon];
        contour_level0 = [-1,0,1];
        % for k = 1:nd
        for k=2
            decisionmapk = reshape(d(:,k), image_size);
            contour(x1,x2, decisionmapk, [-1 1], color1{k},'LineWidth',0.5);
            contour(x1,x2, decisionmapk, [contour_level(1) contour_level(1)], color{k});
            contour(x1,x2, decisionmapk, [contour_level(2) contour_level(2) ], color{k},'LineWidth',2);
            contour(x1,x2, decisionmapk, [contour_level(3) contour_level(3) ], color{k});
            contour(x1,x2, decisionmapk, [contour_level0(3) contour_level0(3) ], color1{k},'LineWidth',0.5);
        end
end

function par = NonLinearDualBoundSVORIM(traindata, targets, c1, c2, epsilon, rho, ker, sigma)
%'traindata' is a training data matrix , each line is a sample vector
%'targets' is a label vector,should  start  from 1 to p
model='EX';
% rho is the augmented Lagrangian parameter.

% history is a structure that contains the objective value, the primal and
% dual residual norms, and the tolerances for the primal and dual residual
% norms at each iteration.

%Data preprocessing
[n, m] = size(traindata);
Lab=sort(unique(targets));
p=length(Lab); %  the number of total rank
l= zeros(1,p);
id={};
X=[];Y=[];
i=1;
Id = [];
while i<=p
    id{i}=find(targets==Lab(i));
    l(i)=length(id{i});
    X=[X;traindata(id{i},:)];
    Y=[Y;targets(id{i})];
    Id = [Id, id{i}];
    i=i+1;
end
[~,Id0]=sort(Id); 

 lc=cumsum(l);
 
 w = [];
 b = [];
 
 s = cell(1,p);
 r = cell(1,p);
 
K=Kernel(ker, X',X',sigma);
K=(K+K')/2;

nch =  nchoosek([1:p],2);
Code = zeros(p,size(nch,1));

for k =1:size(nch,1)
    Code(nch(k,:),k) = [-1,1];
    i = nch(k,1); j =nch(k,2);
    s{k} =lc(i)-l(i)+1:lc(i);
    r{k} = lc(j)-l(j)+1:lc(j);
    c{k} = [1:n];
    c{k}([lc(i)-l(i)+1:lc(i)  lc(j)-l(j)+1:lc(j)]) = [];
        Ak = X(c{k},:);
        Lk = X(s{k},:); 
        Rk =X(r{k},:);
        row=[c{k} c{k} s{k} r{k}];
        Hk = K(row,row);
        %    model= subSVOR(Ak,Hk,Lk,Rk,c1, c2, epsilon, rho);
        model = subSVOR_quadgrog(Ak, Hk, Lk,Rk,c1, c2, epsilon);
        
        P{k} = model.P;
        p0{k} = model.p;
        alpha{k} = model.alpha;
        alphax{k}= model.alphax;
        normw{k} = model.normw;
        time(k) = model.time;
        b(k,1) = model.b;
        Id1= [s{k} c{k} r{k}];
        SVs{k}= model.SVs(Id1);
end
   par.l= s;
   par.r = r;
   par.c= c;
   par.P = P;
   par.p = p0;
   par.alpha = alpha;
   par.alphax = alphax;
   par.normw = normw;
   par.time = time;
   par.b = b;
   par.X=X;
   par.maxtime = max(par.time);
    par.SVs =  SVs;
    par.Y=Y;
    par.Code = Code;
%    par.w = w;
%    par.b = b;

end


function par = subSVOR_quadgrog(Ak, H, Lk,Rk, c1, c2, epsilon)

t_start = tic;
%Global constants and defaults
QUIET    = 0;

m = size(Ak,2);
lk = size(Ak,1);
rk1 = size(Lk,1); 
rk2 =  size(Rk,1);
rk = rk1+rk2;
%ADMM solver
mP=2*lk +rk;    %dimension of Phi
mG=4*lk + 2*rk;  %dimension of Gamma
mU=mG+1;                                %dimension of U
mp1 = 1 : lk;        
mp2 = lk+1 : 2*lk;
mp3 = 2*lk+1: mP;

c= zeros(mU,1);
c([mp1 mp2]+1) = c1;
c(mp3+1) = c2;

q = ones(mP,1);
q(mp1) = epsilon;
q(mp2) = epsilon;
q(mp3) = -1;

p = ones(mP,1);
p(mp2) = -1;
p(mP-rk2+1:mP) =  -1;

% H = [Ak; -Ak; Bk{1}; -Bk{2}]*[Ak; -Ak; Bk{1}; -Bk{2}]';   %linear Kernel 
% Qk=[Ak; Ak; Lk; Rk];

% H= Kernel(ker, Qk',Qk',sigma);
% H=(H'+H)/2+1;
H0 = (H+1).*(p*p');

% % options = optimoptions('quadprog',...
% %     'Algorithm','interior-point-convex','Display','off');
options = optimoptions('quadprog',...
    'Algorithm','interior-point-convex','MaxIter',200,'Display','off');
% % x = quadprog(H,f,A,b,Aeq,beq,lb,ub,x0,options)
A = []; b = []; f = q; Aeq =[]; beq = []; lb = zeros(mP,1); ub = c(2:mP+1); x0 = []; 
 P = quadprog(H0,f,A,b,Aeq,beq,lb,ub,x0,options);

 % diagnostics, reporting, termination checks
 
 par.P= P;
 par.p= p;
 par.alpha = P(mp1);
 par.alphax = P(mp2);
 P3=P(mp3);
 par.SVs = [P3(1:rk1);abs(P(mp1)-P(mp2));P3(rk1+1:rk1+rk2)];
%  par.normw = sqrt(P'*(H0.*(p'*p))*P);
  par.normw =1;
 bk =(p'*P);
 par.b =bk;

%  switch ker 
%      case 'linear'
%   par.w = [Ak; -Ak; Bk{1}; -Bk{2}]'*P;
%   b1 = Ak(P(mp1)~=0,:)* par.w-epsilon;   
%   b2 = Ak(P(mp2)~=0,:)* par.w+epsilon;
%   par.b = mean([b1;b2]);
%  end

 
 if  ~QUIET
       par.time = toc(t_start);
 end

end

function [par, history]  = subSVOR(Ak,H, Lk,Rk, c1, c2, epsilon, rho)
%'traindata' is a training data matrix , each line is a sample vector
%'targets' is a label vector,should  start  from 1 to p

% rho is the augmented Lagrangian parameter.

% history is a structure that contains the objective value, the primal and
% dual residual norms, and the tolerances for the primal and dual residual
% norms at each iteration.

t_start = tic;

%Data preprocessing

%Global constants and defaults
QUIET    = 0;
MAX_ITER = 200;
% ABSTOL   = 1e-4;
% RELTOL   = 1e-2;
ABSTOL   = 1e-6;
RELTOL   = 1e-3;

lk = size(Ak,1);
rk1 = size(Lk,1); 
rk2 =  size(Rk,1);
rk = rk1+rk2;

%ADMM solver
mP=2*lk +rk;    %dimension of Phi
mG=4*lk + 2*rk;  %dimension of Gamma
mU=mG;                                %dimension of U
P = zeros(mP,1);                       %Phi={ w,b, xi, xi*}
G = zeros(mG,1);                       %Gamma={eta,eta*,delta, phi, phi*}
U = zeros(mU,1);                       %U- -update

mp1 = 1 : lk;        
mp2 = lk+1 : 2*lk;
mp3 = 2*lk+1: mP;

c= zeros(mU,1);
c([mp1 mp2]) = c1;
c(mp3) = c2;

q = ones(mP,1);
q(mp1) = epsilon;
q(mp2) = epsilon;
q(mp3) = -1;

p = ones(mP,1);
p(mp2) = -1;
p(mP-rk2+1:mP) =  -1;

% H = [Ak; -Ak; Bk{1}; -Bk{2}]*[Ak; -Ak; Bk{1}; -Bk{2}]';   %linear Kernel 

%  Qk=[Ak; Ak; Lk; Rk];

% H0= Kernel(ker, Qk',Qk',sigma);%Kernel Matrix of Bound SVM 
% H = (H0+1).*(p*p');
% H0= Kernel(ker, Qk',Qk',sigma);%Kernel Matrix of Bound SVM 
H = (H+1).*(p*p');

k=1; 
while k <=MAX_ITER
    %Phi={ w,b, xi, xi*}-update
    V = U + B(mP,G) - c;
    br = - q - rho * AtX(mP,V);
    [P, niters] = cgsolve(H, br, rho);
    
    %Gamma={eta,eta*,delta, phi, phi*}-update with relaxation
    Gold = G;
    G = pos(Bt(mP,c-AX(P)-U));
    
    %U- -update
    r = AX(P) + B(mP,G) - c;
    U = U + r;
    %     history.objval(k)  = objective(H,P,q);
    s = rho*AtX(mP,B(mP,G - Gold));
    history.r_norm(k) = norm(r);
    history.s_norm(k) = norm(s);

    history.eps_pri(k) = sqrt(mU)*ABSTOL + RELTOL*max([norm(AX(P)), norm(B(mP,G)), norm(c)]);
    history.eps_dual(k)= sqrt(mP)*ABSTOL + RELTOL*norm(rho*AtX(mP,U));
    
    if  history.r_norm(k) < history.eps_pri(k) && ...
         history.s_norm(k) < history.eps_dual(k);
         break
    end
    k = k+1;
end
 
 if  ~QUIET
       par.time = toc(t_start);
 end
 
 par.P= P;
 par.p= p;
 par.alpha = P(mp1);
 par.alphax = P(mp2);
% par.normw = sqrt(P'*(H0.*(p'*p))*P);
par.normw=1;
bk =(p'*P);

par.b =bk;
 
 if  ~QUIET
       par.time = toc(t_start);
 end
 
end

% function obj = objective(H,P,q)
%     obj = 1/2 * vHv(H,P) + q'*P;
% end

function [x, niters] = cgsolve(H, b,rho,tol, maxiters)
% cgsolve : Solve Ax=b by conjugate gradients
%
% Given symmetric positive definite sparse matrix A and vector b, 
% this runs conjugate gradient to solve for x in A*x=b.
% It iterates until the residual norm is reduced by 10^-6,
% or for at most max(100,sqrt(n)) iterations

n = length(b);

if (nargin < 4) 
    tol = 1e-6;
    maxiters = max(100,sqrt(n));
elseif(nargin < 5)
    maxiters = max(100,sqrt(n));
end

normb = norm(b);
x = zeros(n,1);
r = b;
rtr = r'*r;
d = r;
niters = 0;
while sqrt(rtr)/normb > tol  &&  niters < maxiters
    niters = niters+1;
%     Ad = A*d; 
    Ad = AtAX(H, d,rho);
    alpha = rtr / (d'*Ad);
    x = x + alpha * d;
    r = r - alpha * Ad;
    rtrold = rtr;
    rtr = r'*r;
    beta = rtr / rtrold;
    d = r + beta * d;
end
end

%Ad = A*d
function Ad = AtAX(H, d,rho)
Ad = H*d +2* rho*d;
end

function F = AtX(mP,V)
F = V(1:mP)+V(mP+1:end);
end

function h = AX(P)
h = [P;P];
end


function h = vHv(H,d)
h = d'*(H*d) ;
end

function Bv= B(mP,v)
Bv(:,1) = [v(1:mP);-v(mP+1:end)];
end

function Btd = Bt(mP,d)
Btd = [d(1:mP);-d(mP+1:end)];
end

function A = pos(A)
A(A<0)=0;
end