function [A,B,C,G,EV,fp,fptot]=SCA_T3(X,N,J,K,R1,R2,R3,conv,runs,rotation,direct)

% This function fits the multi-set Tucker3 model, i.e., Simultaneous
% Component Analysis by means of Tucker3. The following is minimized:
% sum_k ||Xk-Ak*(sum_r(ckr*Gr))*B'||^2 
%
% Input:
%   X=[X1;X2;..;XK] ((N1+..+NK)xJ);     multi-set data
%   Xk(NkxJ), k=1,..,K;                 Xk should be centered
%   N=[N1;N2;..;NK] (Kx1); N1,N2,...NK>=R1;
%        R1,R2,R3: size of core G, with J>=R2, K>=R3
%   conv: convergence criterion, e.g., 1e-6
%   runs=[run1 run2]: do run1 runs with random start, and first run with rational start if run2=1
%           best solution is kept
%   rotation=[opt1 opt2 opt3 opt4]: parameters for rotation of (A,B,C)*G:
%       opt1=0 : no rotation
%       opt1=1 : orthogonal rotation to simple core G and B
%       opt1=2 : orthogonal rotation to simple core G and B and C
%       opt1=3 : orthogonal rotation to simple core G only
%       opt2,opt3,opt4 : rotate A,B,C or not (yes=1, no=0)
%   direct=1 : Ak'*Ak=Nk*eye(R1)    use for centered data
%   direct=0 : Ak'*Ak=eye(R1)       use for covariance estimation
%        
% Output:
%   A=[A1;A2;..;AK]((N1+..+NK)xR1),      inv(Nk)*Ak'*Ak=eye(R1)
%   B (JxR2);                    
%   C (KxR3);   
%   G (R1xR2*R3);    
%   EV (R1xR2*R3);      explained variance due to each core entry
%   fp          fit percentage per k
%   fptot       total fit percentage
%
% References:
% A. Stegeman (2018). Simultaneous component analysis by means of Tucker3.
% Psychometrika, 83, 21-47.
%
% H.A.L. Kiers (1998). Joint orthomax rotation of the core and component 
% matrices resulting from three-mode principal components analysis.
% Journal of Classification, 15, 245-263.



rescale=1;

N=[0;N];

if runs(2)==1
    runtot=runs(1)+1;
else
    runtot=runs(1);
end

LLB=zeros(J,R2,runtot);LLC=zeros(K,R3,runtot);LLA=zeros(sum(N),R1,runtot);
LLG=zeros(R1,R2*R3,runtot);LLf=zeros(runtot,1);LLiter=zeros(runtot,1);

ssX=ssq(X);


% Do run with rational start if runs(2)=1, followed by runs(1) runs with random starts
for run=1:runtot
    fprintf('\n');
    fprintf(' RUN %i \n',run);
    if and(run==1,runs(2)==1)==1
        % rational start via SVDs
        A=[];
        for k=2:K+1
            [Uk,~,Vk]=svd(X(sum(N(1:k-1))+1:sum(N(1:k)),:),0);
            Ak=Uk(:,1:R1);
            A=[A;Ak];
        end
        Q=[];
        for k=2:K+1
            Q=[Q A(sum(N(1:k-1))+1:sum(N(1:k)),:)'*X(sum(N(1:k-1))+1:sum(N(1:k)),:)]; % size(Q)=(R1xJK)
        end;
        Z=permnew(Q,R1,J,K);		% yields JxKxR1 array
        [U,S,V]=svd(Z,0);
        B=U(:,1:R2);
        Z=permnew(Z,J,K,R1);		% yields KxR1xJ array
        [U,S,V]=svd(Z,0);		
        C=U(:,1:R3); 
    else
        % random start
        B=orth(rand(J,R2)-.5);
        C=orth(rand(K,R3)-.5);
        A=[];
        for k=1:K
            Ak=orth(rand(N(k+1),R1)-.5);
            A=[A;Ak];
        end
    end
        
    Q=[];
    for k=2:K+1
        Q=[Q A(sum(N(1:k-1))+1:sum(N(1:k)),:)'*X(sum(N(1:k-1))+1:sum(N(1:k)),:)]; % size(Q)=(R1xJK)
    end;
    % Update core G as in Tucker2 ALS
    Z=permnew(eye(R1)'*Q,R1,J,K);
    Z=permnew(B'*Z,R2,K,R1);
    G=permnew(C'*Z,R3,R1,R2);   
     
    D=[];
    for k=1:K
        Dk=zeros(R1,R2);
        for r=1:R3
            Dk=Dk+C(k,r)*G(:,(r-1)*R2+1:r*R2);
        end
        D=[D Dk];
    end;
        
    % Evaluate the starting function value
    f=0;
    for k=2:K+1
        f=f+ssq(X(sum(N(1:k-1))+1:sum(N(1:k)),:)-A(sum(N(1:k-1))+1:sum(N(1:k)),:)*D(:,(k-2)*R2+1:(k-1)*R2)*B');
    end;
    fprintf(' Function value is %12.8f at Start \n',f);
    
    fold=f+2*conv*f;
    iter=0;
    while fold-f>conv*f 
        fold=f;
        iter=iter+1;
           
        % Update Ak   
        A=[]; 
        for k=2:K+1
            X1k=D(:,(k-2)*R2+1:(k-1)*R2)*B'*X(sum(N(1:k-1))+1:sum(N(1:k)),:)';  % size(X1k)=(R1xIk)
            [Uk,~,Vk]=svd(X1k',0);            
            Ak=Uk*Vk';
            A=[A;Ak];  
        end;

        % Update B,C,G  
        Q=[];
        for k=2:K+1
            Q=[Q A(sum(N(1:k-1))+1:sum(N(1:k)),:)'*X(sum(N(1:k-1))+1:sum(N(1:k)),:)]; % size(Q)=(R1xJK)
        end;
        % Do one iteration of Tucker2 ALS (B,C,G) for R1xJxK array Q
        % update B
        Z=permnew(Q,R1,J,K);
        Z=permnew(Z,J,K,R1);
        Z=permnew(C'*Z,R3,R1,J);
        Z=permnew(eye(R1)'*Z,R1,J,R3);	% yields JxR3xR1 array
        [B,temp]=qr(Z*(Z'*B),0); 
        % update C
        Z=permnew(eye(R1)'*Q,R1,J,K);
        Z=permnew(B'*Z,R2,K,R1);        % yields KxR1xR2 array
        [C,temp]=qr(Z*(Z'*C),0); 
        % Update core G
        Z=permnew(eye(R1)'*Q,R1,J,K);
        Z=permnew(B'*Z,R2,K,R1);
        G=permnew(C'*Z,R3,R1,R2);   
                        
        % Evaluate error ssq f
        D=[];
        for k=1:K
            Dk=zeros(R1,R2);
                for r=1:R3
                    Dk=Dk+C(k,r)*G(:,(r-1)*R2+1:r*R2);
                end
            D=[D Dk];
        end;
    
        f=0;
        for k=2:K+1
            f=f+ssq(X(sum(N(1:k-1))+1:sum(N(1:k)),:)-A(sum(N(1:k-1))+1:sum(N(1:k)),:)*D(:,(k-2)*R2+1:(k-1)*R2)*B');
        end;
        if rem(iter,10)==0
            fprintf(' Function value is %12.8f at iteration %g \n',f,iter);
        end
    end;
    LLB(:,:,run)=B;
    LLC(:,:,run)=C;
    LLA(:,:,run)=A;
    LLG(:,:,run)=G;
    LLf(run)=f;LLiter(run)=iter;
end;


% Pick best solution
[f,index]=min(LLf);
B=LLB(:,:,index);
C=LLC(:,:,index);
A=LLA(:,:,index);
G=LLG(:,:,index);
iter=LLiter(index);
fptot=100-100*f/ssX;

% Compute fit percentage per k
D=[];
for k=1:K
    Dk=zeros(R1,R2);
    for r=1:R3
        Dk=Dk+C(k,r)*G(:,(r-1)*R2+1:r*R2);
    end
    D=[D Dk];
end;
fp=zeros(K,1);
fp2=zeros(K,1);
M=[];
for k=2:K+1
    Xk=X(sum(N(1:k-1))+1:sum(N(1:k)),:);
    modelk=A(sum(N(1:k-1))+1:sum(N(1:k)),:)*D(:,(k-2)*R2+1:(k-1)*R2)*B';
    fk=ssq(Xk-modelk);
    fp(k-1)=100-100*fk/ssq(Xk);
    fp2(k-1)=100*ssq(modelk)/ssq(Xk);
    M=[M;modelk];
end
% fptot=100*ssq(M)/ssX equal to 100-100*ssq(X-M)/ssX 
% Note: fp and fptot are not affected by rotating and rescaling 
% Note: fp and fp2 are equal 


% Rotate solution (eye(R1),B,C)*G 
fprintf('\n');
fprintf(' ROTATION PROCEDURE \n');
opt1=rotation(1);
opt2=rotation(2);
opt3=rotation(3);
opt4=rotation(4);
if opt1==1
    [S,B,C,G,~,~,~,~,~,~,~,~,~]=varimcoco(eye(R1),B,C,G,0,1,0,opt2,opt3,opt4,10);
    A=A*S;
end
if opt1==2
    [S,B,C,G,~,~,~,~,~,~,~,~,~]=varimcoco(eye(R1),B,C,G,0,1,1,opt2,opt3,opt4,10);
    A=A*S;
end
if opt1==3
    [S,B,C,G,~,~,~,~,~,~,~,~,~]=varimcoco(eye(R1),B,C,G,0,0,0,opt2,opt3,opt4,10);
    A=A*S;
end


% Rescale Ak and Ck 
if direct==1
    for k=2:K+1
        Ak=A(sum(N(1:k-1))+1:sum(N(1:k)),:);
        Ak=sqrt(N(k))*Ak;
        A(sum(N(1:k-1))+1:sum(N(1:k)),:)=Ak;
        C(k-1,:)=C(k-1,:)/sqrt(N(k));
    end
end
    
% Rescaling and sign changing solution (eye(R1),B,C)*G
% largest loading positive in each column of B, compensate in G
if rescale==1
    for r2=1:R2
        [temp,row]=max(abs(B(:,r2)));
        s=sign(B(row,r2));
        B(:,r2)=s*B(:,r2);
        for r3=1:R3
            G(:,r2+(r3-1)*R2)=s*G(:,r2+(r3-1)*R2);
        end
    end

    % rescale core slices and B such as in Timmerman & Kiers (2003)
    D=zeros(R2,R2);
    for k=1:K
        Dk=zeros(R1,R2);
        for r=1:R3
            Dk=Dk+C(k,r)*G(:,(r-1)*R2+1:r*R2);
        end
        D=D+N(k+1)*Dk'*Dk;
    end
    LD=inv(sqrt(diag(diag(D))))*sqrt(sum(N))*eye(R2);
    for r=1:R3
        G(:,(r-1)*R2+1:r*R2)=G(:,(r-1)*R2+1:r*R2)*LD;
    end
    B=B*inv(LD);
    
    % largest core entry normalized to 1 per core slice, compensate in C
    for r3=1:R3
        m=max(max(abs(G(:,1+(r3-1)*R2:r3*R2))));
        G(:,1+(r3-1)*R2:r3*R2)=G(:,1+(r3-1)*R2:r3*R2)/m;
        C(:,r3)=C(:,r3)*m;
    end
    
    % largest loading positive in each column of C, compensate in G
    for r3=1:R3
        [temp,row]=max(abs(C(:,r3)));
        s=sign(C(row,r3));
        C(:,r3)=s*C(:,r3);
        G(:,1+(r3-1)*R2:r3*R2)=s*G(:,1+(r3-1)*R2:r3*R2);
    end
    
    % largest weight positive in each row of C, compensate in A
    for k=1:K
        [temp,col]=max(abs(C(k,:)));
        s=sign(C(k,col));
        C(k,:)=s*C(k,:);
        A(sum(N(1:k))+1:sum(N(1:k+1)),:)=s*A(sum(N(1:k))+1:sum(N(1:k+1)),:);
    end
    
    % largest core entry (+1 or -1) positive per row of G, compensate in A
    for r1=1:R1
        [temp,col]=max(abs(G(r1,:)));
        s=sign(G(r1,col));
        G(r1,:)=s*G(r1,:);
        A(:,r1)=s*A(:,r1);
    end
    
    
end
    

% Compute explained variance due to each core entry
EV=zeros(R1,R2*R3);
EV2=zeros(R1,R2*R3);
for r1=1:R1
    for r2=1:R2
        for r3=1:R3
            M=[];
            for k=1:K
                modelk=A(sum(N(1:k))+1:sum(N(1:k+1)),r1)*C(k,r3)*G(r1,r2+(r3-1)*R2)*B(:,r2)';
                M=[M;modelk];
            end
            EV2(r1,r2+(r3-1)*R2)=100-100*ssq(X-M)/ssX;
            EV(r1,r2+(r3-1)*R2)=100*ssq(M)/ssX;
        end
    end
end
% Note: EV=EV2 and sum(sum(EV))=fptot



