%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % NONRESPONSE.M demonstrates the application of % "An econometric method of correcting for unit nonresponse bias in % surveys" by Anton Korinek, Johan A. Mistiaen, Martin Ravallion % % required files: % - nonresponse.m: main Matlab program % - linL.m: Matlab function calculating and adding up the % squares all moment conditions for a given theta, % required for minimization routine % - cpsmar2004.mat: containing sample data from CPS March Supplement 2004 global addobs population Winv g explinspec P vars % define global variables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % READ DATA: y(76327,1): income/capita of household i % state(76327,1): state of household i % interview(51,1): # of households interviewed in state j % typeA(51,1): # of non-responding households in state j filename = 'cpsmar2004.mat'; load(filename); population = interview + typeA; % # of households sampled in state j states = size(interview, 1); % # of states obs = size(y, 1); % total # of households addobs = sparse(state,1:obs,1); % assigns household i to state j %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DEFINE SPECIFICATION: spec_text = 'theta(1) + theta(2)*ln(y)'; theta0 = [20, -1]; % starting parameters for iteration vars = [ones(obs,1), log(y)]; % matrix of variables used, e.g. [constant, y] derivatives = vars; % derivatives of logit function % for linear specifications = vars parameters = size(theta0, 2); % number of parameters in specification disp( '.' ) disp( '===============================================================================') disp([' Specification: P = logit(', spec_text,')']); disp( '-------------------------------------------------------------------------------'); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % ESTIMATE GMM: Winv = diag(1./population); % inverse of weighting matrix W as in equation (10) options = optimset('Display','off','TolFun',1e-8,'TolX',1e-8,'MaxFunEval', 3000, 'MaxIter', 1000); [theta, fval, exitflag, output] = fminsearch(@linL, theta0, options); % run minimization problem with given options s = sum(g.^2)/sum(population); % calculate sigma as in equation (9) in paper G = addobs * (derivatives./kron(ones(1,parameters),explinspec)); % G is dPsi/dtheta in equation (11), i.e. % vector of dpsi/dtheta as in equation (12) sigma = s * inv(G' * Winv * G); % Var(theta) as defined in equation (11) se = sqrt(diag(sigma)); % standard error of theta AIC = states*log(fval/states) + 2*parameters; Schwarz = states*log(fval/states) + parameters*log(parameters); for i=1:parameters disp([' theta (', num2str(i,'%1.0f'), ') | ', num2str(theta(i),'%5.4f'), ' | s.e. ', num2str(se(i),'%5.4f') ]); end disp( '-------------------------------------------------------------------------------') disp([' Minimum fval = ',num2str(fval,'%30.2f'),', s = ', num2str(s,'%2.6f') ]) disp([' AIC = ',num2str(AIC,'%30.2f'), ', Schwarz = ', num2str(Schwarz,'%30.2f') ]) disp( '-------------------------------------------------------------------------------') %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DRAW FIGURE 1: relationship between income and Prob(response) expxupper = exp(vars * theta' + 1.96*sum((vars * sigma).*vars,2)); expxlower = exp(vars * theta' - 1.96*sum((vars * sigma).*vars,2)); Pupper = expxupper./(1 + expxupper); % upper and lower bound of a Plower = expxlower./(1 + expxlower); % 95% confidence interval for P semilogx(y,P,'LineStyle','-','Marker','none','MarkerSize',1); hold on plot(y,Pupper,':'); plot(y,Plower,':'); hold off axis([10^3, 10^6, 0, 1.1]) xlabel('income/capita') ylabel('probability of response')