% Simulated data %clear; %simsamplesize=300; % Sample size of simulation. % true w is [0.8301,0.0692,-0.5534,0] %w_sim=[12,1,-8,0]'; w_sim=w_sim/norm(w_sim); % true w is [.6369,.0531,-.4246,.3715,-.4777,0.2123,0,0,0,0] %w_sim=[12,1,-8,7,-9,4,0,0,0,0]'; w_sim=w_sim/norm(w_sim); % Generate data by the link function % f(si)=(si^2)exp(si), with single index "si" %Xsim=rand(simsamplesize,length(w_sim)); % Standardize predictors. %for j=1:length(w_sim); %if j==1; StandCovs=[]; end; %StandCovs=[StandCovs,(Xsim(:,j)-mean(Xsim(:,j)))/std(Xsim(:,j),1)]; %end; %si_sim=sum(Xsim.*repmat(w_sim,1,simsamplesize)',2); % % Error distribution. %errors=0; %errors=normrnd(0,.1,simsamplesize,1); % error distribution %errors=normrnd(0,.2,simsamplesize,1); % error distribution %errors=normrnd(0,.4,simsamplesize,1); % error distribution %Linksim=(si_sim.^2).*exp(si_sim); %DATA=[Linksim+errors,Xsim]; Outcome=1; Covariates=2:(length(w_sim)+1); %Labels={'X1','X2','X3','X4'}; %Labels={'X1','X2','X3','X4','X5','X6','X7','X8','X9','X10'}; %s=0; MaxKnots=15; % Maximum number of knots. %NumberSamples=75000; thin=5; % Number and thinning of MCMC samples. %Heteroscedastic=1; %Indicate whether you want heteroscedastic model (0 for homoscedastic) %delta1=1; delta2=.5; % gamma prior parameters for v % Example 1: UNESCO data for years 2000-2004. % Analysing relationship between % country educational spending and its GDP. %for j=1:4; %if j==1; StandCovs=[]; end; %StandCovs=[StandCovs,(data(:,j)-mean(data(:,j)))/std(data(:,j),1)]; %end; %DATA=[StandCovs,data(:,5)]; Covariates=1:4; Outcome=5; %Labels=textdata(1,2:5); %s=0; MaxKnots=15; % Maximum number of knots. %NumberSamples=75000; % Number and thinning of MCMC samples. %Heteroscedastic=0; %Indicate whether you want heteroscedastic model (0 for homoscedastic) %delta1=1; delta2=1.5; % gamma prior parameters for v % Example 2: Air pollution data. %for j=3:5; %if j==3; StandCovs=[]; end; %StandCovs=[StandCovs,(data(:,j)-mean(data(:,j)))/std(data(:,j),1)]; %end; %DATA=[nthroot(data(:,2),3),StandCovs]; %Outcome=1; Covariates=2:4; %Labels=textdata(1,3:5); %s=0; MaxKnots=15; % Maximum number of knots. %NumberSamples=75000; thin=5; % Number and thinning of MCMC samples. %Heteroscedastic=0; %Indicate whether you want heteroscedastic model (0 for homoscedastic) %delta1=1; delta2=.25; % gamma prior parameters for v % Example 3: Rock Data (image processing). %clear; %for j=1:3; %if j==1; StandCovs=[]; end; %StandCovs=[StandCovs,(data(:,j)-mean(data(:,j)))/std(data(:,j),1)]; %end; %DATA=[StandCovs,log(data(:,4))]; %clear StandCovs j; %Covariates=1:3; Outcome=4; % Outcome is log permeability. %Labels=[colheaders(:,1:3)]; %s=0; MaxKnots=15; % Maximum number of knots. %NumberSamples=75000; % Number and thinning of MCMC samples. %Heteroscedastic=1; %Indicate whether you want heteroscedastic model (0 for homoscedastic) %delta1=1; delta2=.5; % gamma prior parameters for v %plot((0:100)./10,gampdf((0:100)./10,delta1,delta2)) % CPS High school data 2006-07 % DV: Predomintely black school indicator for j=[7,8,10,13]; if j==7; StandCovs=[]; end; StandCovs=[StandCovs,(data(:,j)-mean(data(:,j)))/std(data(:,j),1)]; end; clear j ans DATA=[StandCovs,data(:,5)]; Covariates=1:4; Outcome=5; MaxCategories=repmat(1,length(DATA(:,5)),1); Labels=textdata(1,[7,8,10,13]); s=0; MaxKnots=25; % Maximum number of knots. NumberSamples=5000; % Number and thinning of MCMC samples. delta1=1; delta2=1/2; % gamma prior parameters for v % %======================================================================================================== % MCMC SAMPLING LOOP. %======================================================================================================== for s=1:NumberSamples %======================================================================================================== %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if s==1; % SET UP ALL INITIAL VALUES FOR THE MCMC CHAIN. format compact; warning off all; % Set up objects that describe basic characteristics of data. ntot=length(DATA(:,Outcome)); n=sum(sum(DATA(:,[Covariates,Outcome]),2)>-Inf); Y=DATA(:,Outcome); X=DATA(:,Covariates); p=length(Covariates); if (sum(strcmp(who,'MaxCategories'))==1); Ylow=NaN(length(Y),1); Ylow(Y==0)=-Inf; Ylow(Y>0 & Y0 & Y0 & Y0 & Yrho_min; Sk=(eye(p)+((rho_k^-2)*(betahat*betahat')))^.5; derivatives=zeros(p+1,1); for i=1:n; Term1=zeros(p+1,p+1); Term2=zeros(p+1,1); for j=1:n Xij=X(j,:)'-X(i,:)'; t=(norm(Sk*Xij)^2)/(h_k^2); K=max(0,(1-(t^2))^2); % Quartic kernel Term1=Term1+([1;Xij]*[1;Xij]'*K); Term2=Term2+(Y(j)*[1;Xij]*K); end; derivatives=derivatives+(inv(Term1)*Term2); end; betahat=(1/n)*derivatives(2:p+1); h_k=a_h*h_k; rho_k=a_rho*rho_k; if rho_k>rho_min; k=k+1; end; end; w_hat=(betahat/norm(betahat)); w0=w_hat; % Starting value for w is the HJS estimate (Hristache, Jutisky, & Spokoiny, 2001, AS) clear C0 h_k h_max a_rho a_h rho_min rho_k betahat k Sk derivatives i j Xij t K Term1 Term2 si0=X*w0; SI0=repmat(si0,1,MaxKnots); % Projected scores. KNOTS0=repmat(quantile(si0,[0:(1/(MaxKnots)):((MaxKnots-1)/MaxKnots)]),n,1); % Knot matrix B0=[ones(n,1),si0,(SI0>KNOTS0).*(SI0-KNOTS0)]; % Basis matrix if Heteroscedastic==1; sigma0=unifrnd(.4,.5,n,1); else sigma0=.5; end;% regression variances v0=.75; W0=v0*eye(MaxKnots+2); W0(1,1)=1; % starting value for variance of Betas. % % Starting value of Beta0, using a %Gibbs sampler (p. 330 of O'Hagan & Forster, 2004, Advanced Theory of Statistics, Second Ed. Winv=inv(W0); if (length(sigma0)>1); Cinv=inv(diag(sigma0)); else Cinv=inv(sigma0*eye(n)); end; BCinvB=(B0'*Cinv*B0); BCinvY=B0'*Cinv*Y; Wstar=inv(Winv+BCinvB); Wstar=tril(Wstar)+tril(Wstar,-1)'; m_star=Wstar*BCinvY; Beta0=m_star; clear Cinv BCinvB BCinvY Wstar m_star; % % Set starting DP baseline parameters with Gibbs sampler. if Heteroscedastic==1; Nsigclus=length(unique(sigma0)); % Number of sigma clusters. mu0=1/gamrnd(Nsigclus,1/sum(unique(sigma0))); %Starting value of exponential mean, using Gibbs sampler. alpha0=1; % Starting value of alpha (precision parameter). end; t = cputime; end; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % %=========================================================================================================================== % Gibbs sample latent variables for polychotomous model. %=========================================================================================================================== if (sum(strcmp(who,'MaxCategories'))==1); Yold=Y; Y=norminv(normcdf(Ylow,B0*Beta0,sigma0)+(rand(n,1).*(normcdf(Yhi,B0*Beta0,sigma0)-normcdf(Ylow,B0*Beta0,sigma0))),B0*Beta0,sigma0); Y(Y==-Inf)=Yold(Y==-Inf); Y(Y==Inf)=Yold(Y==Inf); Y(isnan(Y))=Yold(isnan(Y)); end; %=========================================================================================================================== % % Adaptive MH sample w, as vector; if s==1; lambdaJ=10; % parameter of J spherical (proposal) distribution for w. lambdaJ_samples=[lambdaJ;NaN*zeros(NumberSamples-1,1)]; w_accept_prob_samples=[NaN*zeros(NumberSamples,1)]; w_Samples=[NaN*zeros(NumberSamples,p)]; EYsamples=[NaN*zeros(NumberSamples,n)]; siSamples=EYsamples; end; % Propose w and new basis matrix. w1=normrnd(2*(lambdaJ^2)*w0,1,p,1); w1=w1/norm(w1); si1=X*w1; SI1=repmat(si1,1,MaxKnots); KNOTS1=repmat(quantile(si1,[0:(1/(MaxKnots)):((MaxKnots-1)/MaxKnots)]),n,1); B1=[ones(n,1),si1,(SI1>KNOTS1).*(SI1-KNOTS1)]; % Evaluate Likelihood x prior, for w0 and w1. LP1=sum(log(normpdf(Y,B1*Beta0,sigma0.^.5))); LP0=sum(log(normpdf(Y,B0*Beta0,sigma0.^.5))); % Compute and store accpetance probabilities. accept_prob_w=min(1,exp(LP1-LP0)); w_accept_prob_samples(s)=accept_prob_w; % Update w and basis matrix. if(rand1); Cinv=inv(diag(sigma0)); else Cinv=inv(sigma0*eye(n)); end; BCinvB=B0'*Cinv*B0; BCinvY=B0'*Cinv*Y; Wstar=inv(Winv+BCinvB); Wstar=tril(Wstar)+tril(Wstar,-1)'; m_star=Wstar*BCinvY; if isnan(sum(sum(Wstar)))==0; EIG=eig(Wstar); else EIG=eig(zeros(MaxKnots+2,MaxKnots+2)); end; if sum(EIG>0)==(MaxKnots+2); BetaProp=mvnrnd(m_star,Wstar)'; else BetaProp=Beta0; end; if min(normpdf(Y,B0*BetaProp,sigma0.^.5))>0; Beta0=BetaProp; end; BetaSamples(s,:)=Beta0'; Means=B0*Beta0; clear Winv Cinv BCinvB BCinvY BetaProp m_star W_star; %========================================================================================================= %======================================================================================================== % Adaptive MH sample hypervariance (v) parameter of Beta % Taken from Denison, Holmes, Mallick, & Smith, 2002, p.64, "Bayesian % Methods for Nonlinear Classification and Regression". %======================================================================================================== if s==1; % Initialization. pv_v=.2; vSamples=[NaN*zeros(NumberSamples,1)]; pv_vSamples=vSamples; AcceptRate_vSamples=vSamples; end; %v0=gamrnd(delta1+((MaxKnots+2)/2),inv(delta2+((Beta0'*Beta0)/2))); %vSamples(s)=v0; %W0=v0*eye(MaxKnots+2); W0(1,1)=10^10; % Log probability of v0 Like0=log(mvnpdf(Beta0,zeros(MaxKnots+2,1),W0))+log(gampdf(v0,delta1,delta2)); % Log probability of proposed v Proposal=exp(normrnd(log(v0),sqrt(pv_v))); W1=Proposal*eye(MaxKnots+2); W1(1,1)=10; Like1=log(mvnpdf(Beta0,zeros(MaxKnots+2,1),W1))+log(gampdf(Proposal,delta1,delta2)); if rand(Odds/(1+Odds))),b_alpha-log(eta)); alphaSamples(s)=alpha0; clear eta Odds; %======================================================================================================== % Gibbs Sample mean parameter (mu0) of Exponential baseline distribution, % with mu0 under a reference prior. %======================================================================================================== if s==1; muSamples=[NaN*zeros(NumberSamples,1)]; end; mu0=1/gamrnd(Nsigclus,1/sum(unique(sigma0))); muSamples(s)=mu0; %======================================================================================================== end; %======================================================================================================== % Sample the regression variances under the DP model, using Algorithm 8 of Neal(2000); %======================================================================================================== if s==1; %Initialize. if Heteroscedastic==1; NsigclusSamples=[NaN*zeros(NumberSamples,1)]; sigmaSamples=[NaN*zeros(NumberSamples,length(Y))]; m=1; end; if Heteroscedastic==0; sigmaSamples=[NaN*zeros(NumberSamples,1)]; end; AcceptRateSigSamples=[NaN*zeros(NumberSamples,1)]; pvSig=.2; pvSigSamples=AcceptRateSigSamples; end; % if Heteroscedastic==1; %======================================================================================================== % First, sample the clusters of the sigmas; tab=tabulate(sigma0);Counts=tab(:,2);COUNTS=repmat(Counts',ntot,1); clear tab Counts; [SIGMAS,SIGMA0]=meshgrid(unique(sigma0)',sigma0'); IndSIGMA=SIGMA0==SIGMAS;COUNTS=COUNTS-IndSIGMA; clear SIGMA0 IndSIGMA; % Revise counts to n_-i,c. uniq=sum((COUNTS==0)*1,2); uniq=sort(unique(uniq.*(1:ntot)')); uniq=uniq((2:length(uniq)),:); RATIO1=COUNTS./(n-1+alpha0); clear COUNTS; RATIO2=(alpha0/m)/(n-1+alpha0); RATIO2(1:ntot,1:m)=RATIO2; RATIO=[RATIO1,RATIO2]; clear RATIO1 RATIO2; SIGMAS=[SIGMAS,exprnd(mu0,ntot,m)]; SIGMAS(uniq,Nsigclus+m)=sigma0(uniq,:); clear uniq; % add unique sigmas as candidate sigmas. MEANS=repmat(Means',Nsigclus+m,1)'; OUTCOME=repmat(Y',Nsigclus+m,1)'; LIKE=RATIO.*normpdf(OUTCOME,MEANS,SIGMAS.^.5); clear RATIO OUTCOME MEANS; Norm=sum(LIKE,2); Norm=repmat(Norm',Nsigclus+m,1)'; LIKE=cumsum((LIKE./Norm),2); clear Norm; sample=unifrnd(zeros(ntot,1),LIKE(:,Nsigclus+m)); [sample,cols]=meshgrid(sample,(1:(Nsigclus+m))); sample=sample'; cols=cols'; sample=sample>LIKE; clear LIKE; sample=max(sample.*cols,[],2)+1; sample=repmat(sample',Nsigclus+m,1)'; sample=(cols==sample); clear cols; sigma0=sum(sample.*SIGMAS,2); clear sample SIGMAS; Nsigclus=length(unique(sigma0)); NsigclusSamples(s,:)=Nsigclus; % Given sample clusters, use adaptive MH step to generate new posterior sample of sigmas; uniq=unique(sigma0); ClusterIDs=sum((repmat(uniq,1,ntot)'==repmat(sigma0,1,Nsigclus)).*repmat(1:Nsigclus,ntot,1),2); LP0=log(normpdf(Y,Means,sigma0.^.5))+log(exppdf(sigma0,mu0)); LP0=accumarray(ClusterIDs,LP0); Proposal=exp(normrnd(log(uniq),pvSig)); Proposal_n=Proposal(ClusterIDs,:); LP1=log(normpdf(Y,Means,Proposal_n.^.5))+log(exppdf(Proposal_n,mu0)); LP1=accumarray(ClusterIDs,LP1); AcceptProbs=min(1,exp(LP1-LP0)); clear Proposal_n LP1 LP0 i; Accept=rand(Nsigclus,1).441 && s<=20000); pvSig=exp(log(pvSig)+min(.01,s^(-.5))); end; if (AcceptRate<.441 && s<=20000); pvSig=exp(log(pvSig)-min(.01,s^(-.5))); end; sigmaSamples(s,:)=sigma0'; AcceptRateSigSamples(s)=AcceptRate; pvSigSamples(s)=pvSig; clear AcceptRate; end; % Use adaptive metropolis step for homoscedastic model. if (Heteroscedastic==0 || Discrete==1); if (Discrete==0); Like0=sum(log(normpdf(Y,Means,sigma0^.5))); Proposal=exp(normrnd(log(sigma0),sqrt(pvSig))); Like1=sum(log(normpdf(Y,Means,Proposal^.5))); if rand