function [SS,TT,UU,GG,X,Xblocks,iters,error_ssq]=tucker3_blocks(Z,A0,B0,C0,G0,conv,L,maxit,normalize);

% usage:    [SS,TT,UU,GG,X,iters,error_ssq]=tucker3_blocks(Z,A0,B0,C0,G0,conv,L);
%
% computes CP_limit Tucker3 decomposition (SS,TT,UU,GG) closest to Z using ALS
%       core GG is rxrxr and block diagonal
%       each block is boundary point with rank > size
%
% input:    unfolded array Z
%           initial values (A0,B0,C0,G0)
%           conv convergence criterion
%           vector L contains sizes of the blocks (1,2,3,4 only)
%           maxit max number of iterations
%           normalize=1     for unit core entries (if possible)
%           normalize=2     for unit length columns in SS,TT,UU
%
% output:   CP_limit Tucker3 decomp (SS,TT,UU,GG) and corresponding array X
%           Xblocks contains the arrays corresponding to each block
%
% uses:     permnew.m     ssq.m    (by Henk Kiers)
%
% code by   Alwin Stegeman, a.w.stegeman@rug.nl   
%
% refs: H.A.L. Kiers & A.K. Smilde (1998) Constrained three-mode factor
%       analysis as a tool for parameter estimation with second-order
%       instrumental data. Journal of Chemometrics, 12, 125-147.
%       
%       A. Stegeman (2012). Candecomp/Parafac, from diverging components
%       to a decomposition in block terms. SIAM Journal on Matrix
%       Analysis and Applications, 33, 291-316.
%
%       A. Stegeman (2013). A three-way Jordan canonical form as limit 
%       of low-rank tensor approximations. SIAM Journal on Matrix 
%       Analysis and Applications, 34, 624-650.


r=size(A0,2);

n=size(A0,1);
m=size(B0,1);
p=size(C0,1);

Zvec=reshape(Z,n*m*p,1);

Z1=permnew(Z,n,m,p);
Z2=permnew(Z1,m,p,n);

freecore=zeros(r,r*r);
s=0;
for j=1:length(L)
    k=L(j);
    s=s+k;
    if k==1
        Gj=1;
    end
    if k==2
        Gj=[eye(2) [0 1;0 0]];
    end
    if k==3
        Gj=[eye(3) [0 1 0;0 0 1;0 0 0] [0 0 1;0 0 0;0 0 0]];
    end
    if k==4
        Gj=[eye(4) [0 1 1 0;0 0 1 0;0 0 0 1;0 0 0 0] [0 0 1 0;0 0 0 1;zeros(2,4)] [0 0 0 1;zeros(3,4)]];
    end
    for i=1:k
            freecore(s-k+1:s,(s-k+i-1)*r+s-k+1:(s-k+i-1)*r+s)=Gj(:,(i-1)*k+1:i*k);
    end
end

freecore=reshape(freecore,r*r*r,1);     % identifies free core elements
        

A=A0;
B=B0;
C=C0;
G=G0;

% compute error for starting values (A0,B0,C0,G0)
f=ssq(Z-A*G*kron(C',B'));

%fprintf(' Tucker3 function value at start is %12.8f \n',f);
iter=0;
fold=2*f;


while (fold-f>f*conv) & (iter<maxit);

  iter=iter+1;
  fold=f;

  % update A   
  
  F=kron(C,B)*G';
  A=inv(F'*F)*F'*Z';
  A=A';

    
  % update B

  H=permnew(G,r,r,r);
  F=kron(A,C)*H';
  B=inv(F'*F)*F'*Z1';
  B=B';
  

  % update C

  H=permnew(H,r,r,r);
  F=kron(B,A)*H';
  C=inv(F'*F)*F'*Z2';
  C=C';
  
 
  % Update Core
  
  Gvec=reshape(G,r*r*r,1);
  F=kron(kron(C,B),A);
  F=F(:,freecore==1);
  Gvec2=inv(F'*F)*F'*Zvec;
  Gvec(freecore==1)=Gvec2;
  G=reshape(Gvec,r,r*r);
  
    

  % Evaluate f

  f=ssq(Z-A*G*kron(C',B'));
%  if rem(iter,10)==0,fprintf(' Tucker3 error ssq after iteration %g is %12.8f \n',iter,f);end;
end;

%fprintf(' Final Tucker3 error ssq after iteration %g is %12.8f \n',iter,f);


% normalize (most) free core elements to 1
if normalize==1
    s=0;
    for j=1:length(L)
        k=L(j);
        s=s+k;
        A1=A(:,s-k+1:s);
        B1=B(:,s-k+1:s);
        C1=C(:,s-k+1:s);
        G1=zeros(k,k*k);
        for i=1:k
            G1(:,(i-1)*k+1:i*k)=G(s-k+1:s,(s-k+i-1)*r+s-k+1:(s-k+i-1)*r+s);
        end
        if k==1
            LA1=sqrt(A1'*A1);
            LB1=sqrt(B1'*B1);
            LC1=sqrt(C1'*C1);
            A1=A1*inv(LA1);
            B1=B1*inv(LB1);
            C1=C1*LA1*LB1*G1;
            G1=1;
            A(:,s)=A1;
            B(:,s)=B1;
            C(:,s)=C1;
            G(s,(s-1)*r+s)=G1;
        end
        if k==2
            S=inv(G1(:,1:2));
            A1=A1*G1(:,1:2);
            G1=S*G1;
            u=G1(1,4);
            G1(1,4)=1;
            C1=C1*[1 0;0 u];
            A(:,s-1:s)=A1;
            C(:,s-1:s)=C1;
            G(s-1:s,(s-2)*r+s-1:(s-2)*r+s)=G1(:,1:2);
            G(s-1:s,(s-1)*r+s-1:(s-1)*r+s)=G1(:,3:4);
        end
        if k==3
            S=inv(G1(:,1:3));
            A1=A1*G1(:,1:3);
            G1=S*G1;
            u=G1(1,9);
            G1(1,9)=1;
            C1=C1*[1 0 0;0 1 0;0 0 u];
            if abs(G1(1,5))>0
                u=G1(1,5);
                G1(1,5)=1;
                G1(2,6)=G1(2,6)/u;
                C1=C1*[1 0 0;0 u 0;0 0 1];
            end
            A(:,s-2:s)=A1;
            C(:,s-2:s)=C1;
            G(s-2:s,(s-3)*r+s-2:(s-3)*r+s)=G1(:,1:3);
            G(s-2:s,(s-2)*r+s-2:(s-2)*r+s)=G1(:,4:6);
            G(s-2:s,(s-1)*r+s-2:(s-1)*r+s)=G1(:,7:9);
        end
        if k==4
            S=inv(G1(:,1:4));
            A1=A1*G1(:,1:4);
            G1=S*G1;
            u=G1(1,6);
            G1(:,5:8)=G1(:,5:8)*inv(u);
            v=G1(1,11);
            G1(:,9:12)=G1(:,9:12)*inv(v);
            w=G1(1,16);
            G1(:,13:16)=G1(:,13:16)*inv(w);
            C1=C1*[1 0 0 0;0 u 0 0;0 0 v 0;0 0 0 w];
            A(:,s-3:s)=A1;
            C(:,s-3:s)=C1;
            G(s-3:s,(s-4)*r+s-3:(s-4)*r+s)=G1(:,1:4);
            G(s-3:s,(s-3)*r+s-3:(s-3)*r+s)=G1(:,5:8);
            G(s-3:s,(s-2)*r+s-3:(s-2)*r+s)=G1(:,9:12);
            G(s-3:s,(s-1)*r+s-3:(s-1)*r+s)=G1(:,13:16);
        end
    end
end


% ALTERNATIVE NORMALIZATION: lengths of columns A,B,C set to 1
if normalize==2
    LA=sqrt(diag(A'*A));
    LB=sqrt(diag(B'*B));
    LC=sqrt(diag(C'*C));
    A=A*inv(diag(LA));
    B=B*inv(diag(LB));
    C=C*inv(diag(LC));
    G3way=zeros(r,r,r);
    G3wayt=zeros(r,r,r);
    for i=1:r
        G3way(:,:,i)=G(:,(i-1)*r+1:i*r);
    end
    for i=1:r
        G3wayt(:,:,i)=LC(i)*diag(LA)*G3way(:,:,i)*diag(LB);
    end
    G=[];
    for i=1:r
        G=[G G3wayt(:,:,i)];
    end
end

X=A*G*kron(C',B');

SS=A;
TT=B;
UU=C;
GG=G;

iters=iter;
error_ssq=f;

Xblocks=zeros(n,m*p,length(L));
s=0;
for j=1:length(L)
    k=L(j);
    s=s+k;
    Sj=SS(:,s-k+1:s);
    Tj=TT(:,s-k+1:s);
    Uj=UU(:,s-k+1:s);
    Gj=[];
    for d=1:k
        Gj=[Gj GG(s-k+1:s,(s-k+d-1)*r+s-k+1:(s-k+d-1)*r+s)];
    end
    Xblocks(:,:,j)=Sj*Gj*kron(Uj',Tj');
end
