function PCAres=MMPCA(Y,R) % performs PCA based on the multivariate MM estimate of shape, with % fast and robust bootstrap % % [calls multiMM.m and multiMMRobBoot.m] % % INPUT : % Y : (n x q) data % R : number of bootstrap samples % OUTPUT : % PCAres.MMest : (structure) result of multiMM % PCAres.MMboot : (structure) result of multiMMRobBoot % PCAres.vecMMest : (2*q+2*q*q x 1) MM(and S) estimates in vec-form % PCAres.eigG : (q x 1) eigenvalues of MM shape in ascending (!) order % PCAres.eigvec : (q*q x 1) eigenvectors of MM-shape (in vec-form) % PCAres.pvar : (q-1 x 1) percentages of variance for MM eigenvalues % PCAres.variance_MM : (2*q+2*q*q x 1) bootstrap variance for % MM-estimates % PCAres.variance_eigG : (q x 1) bootstrap variance for MM eigenvalues % PCAres.variance_eigvec : (q*q x 1) bootstrap variance for MM eigenvectors % PCAres.variance_pvar : (q-1 x 1) bootstrap variance for percentage of % variance for MM-eigenvalues % PCAres.avgangle : (q x 1) average angles between bootstrap eigenvectors % and original MM eigenvectors % PCAres.CI95_MM : (2*q+2*q*q x 2) 95% BCa intervals for MM-estimates % ([lower upper]) % PCAres.CI95_eigG : (q x 2) 95% BCa intervals for MM eigenvalues % PCAres.CI95_eigvec : (q*q x 2) 95% BCa intervals for MM eigenvectors % PCAres.CI95_pvar : (q-1 x 2) 95% BCa intervals for percentage of % variance for MM-eigenvalues % PCAres.CI95one_pvar : (q-1 x 1) 95% one-sided BCa intervals for % percentage of variance for MM-eigenvalues ([-infty upper]) % PCAres.failedsamples : number of discarded bootstrap samples due to % non-positive definiteness of shape % (results from 'discarded' bootstrap samples are omitted % such that actual number of recalculations can be lower than R) % REMARK: whenever matrices contain eigenvalue-related values, the ordering is "ASCENDING" % (e.g. first column of eigenvectors contains vector corresponding to smallest eigenvalue) % (or first row in case of the matrix of bootstrapped values) [n q] = size(Y); bdp = .5; % compute the MM-estimates MMests=multiMM(Y); MMloc = MMests.loc; MMSigma = MMests.covariance; MMGamma = MMests.shape; SSigma = MMests.Sshape*(MMests.auxscale)^2; Sloc = MMests.Sloc; % compute eigenvalues/vectors [VGamma DGamma] = eig(MMGamma); [VSigma DSigma] = eig(MMSigma); [MMeigvalsGamma IXGamma] = sort(diag(DGamma)); [MMeigvalsSigma IXSigma] = sort(diag(DSigma)); for k=1:q [dummy IX]=sort(abs(VSigma(:,IXSigma(k)))); VSigma(:,IXSigma(k))=sign(VSigma(IX(q),IXSigma(k)))*VSigma(:,IXSigma(k)); end MMeigvecs=vecop(VSigma(:,IXSigma)); MMvarperc=zeros(1,q-1); for k=1:(q-1) MMvarperc(k)=sum(MMeigvalsSigma((q-k+1):q))/sum(MMeigvalsSigma); end % perform fast and robust bootstrap on MM-result bootres = multiMMRobBoot(Y, ones(n,1), Sloc, SSigma, MMloc, MMSigma, R, bdp); avgangle = mean(bootres.angles,2)'; MMvariances = var(bootres.centered,0,2)'; eigGvariances = var(bootres.eigGcentered,0,2)'; pvarvariances = var(bootres.pvarcentered,0,2)'; eigvecvariances = var(bootres.eigveccentered,0,2)'; % sort bootstrap recalculations for constructing intervals sortedMMest = sort(bootres.centered,2); sortedeigG = sort(bootres.eigGcentered,2); sortedpvar = sort(bootres.pvarcentered,2); sortedeigvec = sort(bootres.eigveccentered,2); vecMMest = bootres.MMest; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % compute BCa confidence limits % empirical inlfuences for computing a in BCa intervals, based on IF(MM) EinfMM = einfsMM(Y, MMloc, MMSigma); inflE = [EinfMM.loc EinfMM.shape EinfMM.covS EinfMM.locS]; eigGinflE = EinfMM.eigs; eigvecinflE = EinfMM.eigvec; pvarinflE = EinfMM.varperc; % set R equal to the actual number of OK bootstrap samples failedsamples = R - size(bootres.centered,2); R = R - failedsamples; MMest_95CIbca = zeros(2*q+2*q*q,2); for i=1:(2*q+2*q*q) nofless=length(sortedMMest(i,sortedMMest(i,:)<=0)); w=norminv(nofless/(R+1)); a=1/6*sum(inflE(:,i).^3)/(sum(inflE(:,i).^2)^(3/2)); alphatildelow95=normcdf(w+(w-1.96)/(1-a*(w-1.96))); alphatildehigh95=normcdf(w+(w+1.96)/(1-a*(w+1.96))); indexlow95=max((R+1)*alphatildelow95,1); indexlow95=min(indexlow95,R); indexhigh95=min((R+1)*alphatildehigh95,R); indexhigh95=max(indexhigh95,1); MMest_95CIbca(i,1) = sortedMMest(i,round(indexlow95)) + vecMMest(i); MMest_95CIbca(i,2) = sortedMMest(i,round(indexhigh95)) + vecMMest(i); end eigG_95CIbca = zeros(q,2); for i=1:q nofless=length(sortedeigG(i,sortedeigG(i,:)<=0)); w=norminv(nofless/(R+1)); a=1/6*sum(eigGinflE(:,i).^3)/(sum(eigGinflE(:,i).^2)^(3/2)); alphatildelow95=normcdf(w+(w-1.96)/(1-a*(w-1.96))); alphatildehigh95=normcdf(w+(w+1.96)/(1-a*(w+1.96))); indexlow95=max((R+1)*alphatildelow95,1); indexlow95=min(indexlow95,R); indexhigh95=min((R+1)*alphatildehigh95,R); indexhigh95=max(indexhigh95,1); eigG_95CIbca(i,1) = sortedeigG(i,round(indexlow95)) + MMeigvalsGamma(i); eigG_95CIbca(i,2) = sortedeigG(i,round(indexhigh95)) + MMeigvalsGamma(i); end eigvec_95CIbca = zeros(q*q,2); for i=1:(q*q) nofless=length(sortedeigvec(i,sortedeigvec(i,:)<=0)); w=norminv(nofless/(R+1)); a=1/6*sum(eigvecinflE(:,i).^3)/(sum(eigvecinflE(:,i).^2)^(3/2)); alphatildelow95=normcdf(w+(w-1.96)/(1-a*(w-1.96))); alphatildehigh95=normcdf(w+(w+1.96)/(1-a*(w+1.96))); indexlow95=max((R+1)*alphatildelow95,1); indexlow95=min(indexlow95,R); indexhigh95=min((R+1)*alphatildehigh95,R); indexhigh95=max(indexhigh95,1); eigvec_95CIbca(i,1) = sortedeigvec(i,round(indexlow95)) + MMeigvecs(i); eigvec_95CIbca(i,2) = sortedeigvec(i,round(indexhigh95)) + MMeigvecs(i); end pvar_95CIbca = zeros(q-1,2); pvar_95CIbca_one = zeros(q-1,1); for i=1:(q-1) nofless=length(sortedpvar(i,sortedpvar(i,:)<=0)); w=norminv(nofless/(R+1)); a=1/6*sum(pvarinflE(:,i).^3)/(sum(pvarinflE(:,i).^2)^(3/2)); alphatildelow95=normcdf(w+(w-1.96)/(1-a*(w-1.96))); alphatildehigh95=normcdf(w+(w+1.96)/(1-a*(w+1.96))); indexlow95=max((R+1)*alphatildelow95,1); indexlow95=min(indexlow95,R); indexhigh95=min((R+1)*alphatildehigh95,R); indexhigh95=max(indexhigh95,1); pvar_95CIbca(i,1) = sortedpvar(i,round(indexlow95)) + MMvarperc(i); pvar_95CIbca(i,2) = sortedpvar(i,round(indexhigh95)) + MMvarperc(i); % one-sided interval alphatildehigh95=normcdf(w+(w+1.645)/(1-a*(w+1.645))); indexhigh95=min((R+1)*alphatildehigh95,R); indexhigh95=max(indexhigh95,1); pvar_95CIbca_one(i) = sortedpvar(i,round(indexhigh95)) + MMvarperc(i); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PCAres.MMest = MMests; PCAres.MMboot = bootres; PCAres.vecMMest = vecMMest; PCAres.eigG = MMeigvalsGamma'; PCAres.eigvec = MMeigvecs; PCAres.pvar = MMvarperc'; PCAres.variance_MM = MMvariances; PCAres.variance_eigG = eigGvariances; PCAres.variance_eigvec = eigvecvariances; PCAres.variance_pvar = pvarvariances; PCAres.avgangle = avgangle; PCAres.CI95_MM = MMest_95CIbca; PCAres.CI95_eigG = eigG_95CIbca; PCAres.CI95_eigvec = eigvec_95CIbca; PCAres.CI95_pvar = pvar_95CIbca; PCAres.CI95one_pvar = pvar_95CIbca_one; PCAres.failedsamples = failedsamples; % -------------------------------------------------------------------- function einfs = einfsMM(Y, MMloc, MMcov) % empirical influences for MM-estimates + PCA estimates [n p] = size(Y); c = biweight95eff(p); [c0 b0] = multipar(p,0.5); MMshape = det(MMcov)^(-1/p) * MMcov; % compute eigenvalues/vectors [VGamma DGamma] = eig(MMshape); [eigs IXGamma] = sort(diag(DGamma)); for k = 1:p [dummy IX] = sort(abs(VGamma(:,IXGamma(k)))); VGamma(:,IXGamma(k)) = sign(VGamma(IX(p),IXGamma(k)))*VGamma(:,IXGamma(k)); end eigvecs = VGamma(:,IXGamma); varperc = zeros(1,p-1); for k = 1:(p-1) varperc(k) = sum(eigs((p-k+1):p))/sum(eigs); end divec = sqrt(mahsq(Y, MMloc, MMcov, n)); psidervec = rhobiweightder2(divec, c); psidervecS = rhobiweightder2(divec, c0); uvec = rhobiweightder1(divec, c) ./ divec; uvecS = rhobiweightder1(divec, c0) ./ divec; vvec = rhobiweightder1(divec,c) .* divec; vvecS = rhobiweightder1(divec,c0) .* divec; rhovecS = rhobiweight(divec,c0); betaMM = (1-1/p) * mean(uvec) + 1/p * mean(psidervec); einfs.loc = 1 / betaMM * (uvec' * ones(1,p)) .* (Y - ones(n, 1) * MMloc); betaS = (1-1/p) * mean(uvecS) + 1/p * mean(psidervecS); einfs.locS = 1 / betaS * (uvecS' * ones(1,p)) .* (Y - ones(n, 1) * MMloc); gamma3 = mean(vvecS); gamma1MM = mean(psidervec.*(divec.^2) + (p+1)*vvec) / (p+2); gamma1S = mean(psidervecS.*(divec.^2) + (p+1)*vvecS) / (p+2); einfscov = zeros(n,p*p); einfscovS = zeros(n,p*p); einfsshape = zeros(n,p*p); einfseigvec = zeros(n,p*p); einfseigscov = zeros(n,p); einfseigs = zeros(n,p); einfsvarperc = zeros(n,p-1); for i = 1:n IFcov = 2/gamma3*(rhovecS(i) - b0) * MMcov + 1/gamma1MM*p*vvec(i)*((Y(i,:)-MMloc)'*(Y(i,:)-MMloc)/divec(i)^2-1/p*MMcov); einfscov(i,:) = vecop( IFcov )'; IFcovS = 2/gamma3*(rhovecS(i) - b0) * MMcov + 1/gamma1S*p*vvecS(i)*((Y(i,:)-MMloc)'*(Y(i,:)-MMloc)/divec(i)^2-1/p*MMcov); einfscovS(i,:) = vecop( IFcovS )'; IFshape = 1/gamma1MM*p*vvec(i)*det(MMcov)^(-1/p)*((Y(i,:)-MMloc)'*(Y(i,:)-MMloc)/divec(i)^2-1/p*MMcov); einfsshape(i,:) = vecop( IFshape )'; IFeigvecs = zeros(p,p); for k=1:p IFeigveck = zeros(p,1); for j=1:p if j~=k IFeigveck = IFeigveck + 1/(eigs(k)-eigs(j))*(eigvecs(:,j)'*IFshape*eigvecs(:,k))*eigvecs(:,j); end end IFeigvecs(:,k) = IFeigveck; einfseigs(i,k) = eigvecs(:,k)' * IFshape * eigvecs(:,k); einfseigscov(i,k) = eigvecs(:,k)' * IFcov * eigvecs(:,k); end einfseigvec(i,:) = vecop(IFeigvecs)'; for k=1:(p-1) einfsvarperc(i,k) = 1/sum(eigs) * ((1-varperc(k))*sum(einfseigs(i,(p-k+1):p)) - varperc(k)*sum(einfseigs(i,1:(p-k)))); end end einfs.shape = einfsshape; einfs.cov = einfscov; einfs.covS = einfscovS; einfs.eigvec = einfseigvec; einfs.eigs = einfseigs; einfs.eigscov = einfseigscov; einfs.varperc = einfsvarperc; % -------------------------------------------------------------------- function rho=rhobiweight(x,c) % Computes Tukey's biweight rho function met constante c voor alle waarden % in de vector x. hulp=x.^2/2-x.^4/(2*c^2)+x.^6/(6*c^4); rho=hulp.*(abs(x)=c); % -------------------------------------------------------------------- function psi=rhobiweightder1(x,c) % Computes Tukey's biweight psi function met constante c voor alle waarden % in de vector x. hulp=x-2.*x.^3/(c^2)+x.^5/(c^4); psi=hulp.*(abs(x)