%% Simulates Galton father/son data (see Friedman 78). %% First set some graphical parameters scatterPointSize = 6; conditionalMeanMarkerSize = 10; % Set bivariate normal parameters (from Freedman) N = 100; xMu = 68; yMu = 69; xSD = 2.7; ySD = 2.7; rho = 0.5; covariance = rho * xSD * ySD; % Set up arrays for function inputs. mu = [xMu, yMu]; sigma = [xSD^2, covariance; covariance, ySD^2]; % Draw a random sample Nsample = 1078; sample = mvnrnd(mu, sigma, Nsample); % Fit a linear model. lmgalton = polyfit(sample(:, 1), sample(:, 2), 1); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%% First create the level sets plot %%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Set levels for the level set plot. levels = [0.8, 0.9, 0.95, 0.99]; inverseSigma = sigma^-1; cband_y = sqrt(chi2inv(levels, 2)/inverseSigma(1,1)); confidenceBandPoints = [68+cband_y; 69+zeros(1, 4)]'; confidenceBandValues = mvnpdf(confidenceBandPoints, mu, sigma); xgrid = (xMu-(3.5*xSD)):(7*xSD/N):(xMu+(3.5*xSD)); ygrid = (yMu-(3.5*ySD)):(7*ySD/N):(yMu+(3.5*ySD)); [x, y] = meshgrid(xgrid, ygrid); pdfValues = mvnpdf([x(:), y(:)], mu, sigma); pdfValues = reshape(pdfValues, size(x)); figure set(gcf, 'Position', [200, 200, 1700, 600]) subplot(1, 2, 1) [C,h] = contour(xgrid, ygrid, pdfValues, ... 'LevelList', confidenceBandValues, 'LineWidth', 2); % Add in text labels for level sets. v = {'80%', '90%', '95%', '99%'}; textHandles = clabel(C, h, 'FontSize', 15, 'LabelSpacing', 2000); m = length(textHandles); for i = 1:m set(textHandles(i), 'String', v(i)); end set(h,'ShowText','off') colormap gray(1) set(gca, 'FontSize', 20, 'xlim', [58, 78], 'ylim', [59, 79], 'TickDir', 'out') xlabel('Father''s Height (inches)', 'FontSize', 20) ylabel('Son''s Height (inches)', 'FontSize', 20) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%% Create the scatter plot %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% subplot(1, 2, 2) plot(X(:, 1), X(:, 2), '.', ... 'MarkerSize', scatterPointSize, 'MarkerEdgeColor', [0.6, 0.6, 0.6]) set(gca, 'FontSize', 20, 'xlim', [58, 78], 'ylim', [59, 79], 'TickDir', 'out') xlabel('Father''s Height (inches)', 'FontSize', 20) ylabel('Son''s Height (inches)', 'FontSize', 20) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%% Add lines to both %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % In the next section, we choose two points, at father's heights % of 64 and 72 inches, and plot the mean of the observations % falling within a small window of those points. The goal is to % show that the conditional means are close to the regression % line. % Set the points point1x = 64; point2x = 72; % Create small windows. epsilon = 0.5; window1 = point1x + epsilon * [-1, 1]; window2 = point2x + epsilon * [-1, 1]; % Find Xs close to the point's xs. inWindow1 = abs( sample(:,1) - point1x ) < epsilon; inWindow2 = abs( sample(:,1) - point2x ) < epsilon; % Find the mean Ys for Xs close to point's xs. point1y = mean( sample( inWindow1, 2 ) ); point2y = mean( sample( inWindow2, 2 ) ); % Now go through and add the lines and points. for i = 1:2 subplot(1,2,i) set(gca,'OuterPosition',get(gca,'OuterPosition')+[0, 0+0.05, 0, 0]) hold on; % Draw the vertical lines creating boundaries around the small % windows. % line([x1, x2], [y1, y2]) plots a line from x1,y1 to x2,y2, so % line([x, x], [yLow, yHigh]) plots a vertical line at x. lineLocations = [window1, window2]; yLimits = [50, 90]; for j = 1:4 line([lineLocations(j), lineLocations(j)], yLimits, ... 'Color', 'k', 'LineStyle', '--') end % The theoretical line has slope 1 and intercept 1 (means are 69 % and 68, so sons are, on average, 1 inch taller than fathers). theoreticalLine = refline(1, 1); regressionLine = refline(lmgalton(1, 1), lmgalton(1, 2)); set(theoreticalLine, 'Color', [0, 0, 0], 'LineStyle', '-.') set(regressionLine, 'Color', [0, 0, 0]) % Place xs ('xr' symbols) at the conditional means. plot([point1x, point2x], [point1y, point2y], 'xr', ... 'LineWidth', 2, 'MarkerSize', conditionalMeanMarkerSize) end