% ### EXregressionEX1.m ### 10.03.14 % Fit a straight line: y(x) = a + b*x (i.e., find optimal values for a and % b for a linear function to a given set of data) using three methods: % Method 1. Matlab's built-in blackbox function polyfit.m % Method 2. exact analytic solution (via Bevington) % Method 3. brute force minimizing chi-squared via a grid-search method (user specifies grid) % --> assumes sigma_i=1 for all points (i.e., unit variance, const. for all points) clear; figure(1); clf; % ------------------------------------- % specify range of x-values xMin= 0; % min. xMax= 2; % max. xNum= 100; % number of 'data' points % choose parameters of linear function (y=aD+bD*x+noise) aD= 0.15; % intercept bD= 0.44; % slope noiseF= 0.1; % noise factor {0.1} % define 'grid' range for Method 2 (i.e., one needs to 'guess' here!) gridA= linspace(0.1,0.4,100); % intercept gridB= linspace(0.1,0.5,100); % slope % ------------------------------------- data.x= linspace(xMin,xMax,xNum); data.y= aD+ bD*data.x + noiseF*randn(numel(data.x),1)'; % determine noisy (straight) line disp(['==================================']); disp(['Base function: y= a+b*x= ',num2str(aD),' + ',num2str(bD),'*x (+ Gaussian noise)']); N= numel(data.x); % total number of points % ------------ % Method 0: Built-in Matlab function [p0,S]= polyfit(data.x,data.y,1); % also calculate r^2 (coefficient of determination) via % http://www.mathworks.com/help/matlab/data_analysis/linear-regression.html?refresh=true yfit = polyval(p0,data.x); % this line is equivalent to > yfit = p(1) * x + p(2); yresid = data.y - yfit; SSresid = sum(yresid.^2); SStotal = (length(data.y)-1) * var(data.y); RS0= 1 - SSresid/SStotal; disp(['Method 0 (via polyfit): y= ',num2str(p0(2)),' + ',num2str(p0(1)),'*x (r^2 = ',num2str(RS0),')']); % ------------ % Method 1: Exact linear regression (see ch.6 from Bevington, eqns.6.13 and 6.23) Delta= N*sum(data.x.^2)-sum(data.x)^2; a1= 1/Delta*(sum(data.x.^2)*sum(data.y)- sum(data.x)*sum(data.x.*data.y)); b1= 1/Delta*(N*sum(data.x.*data.y)- sum(data.x)*sum(data.y)); sigmaA1= sqrt(1/Delta*sum(data.x.^2)); sigmaB1= sqrt(N/Delta); % also calculate r^2 (coefficient of determination) via % http://en.wikipedia.org/wiki/Coefficient_of_determination meanY= sum(data.y)/N; bottom1= sum((data.y-meanY).^2); top1= sum((data.y-a1-b1*data.x).^2); RS1= 1- top1/bottom1; disp(['Method 1 (exact): y= ',num2str(a1),' + ',num2str(b1),'*x (r^2 = ',num2str(RS1),')']); disp(['Method 1 (exact) standard deviations: SD.a= ',num2str(sigmaA1),', SD.b= ',num2str(sigmaB1)]); % ------------ % Method 2: Brute-force minimize chi-squared via grid search for n=1:numel(gridA) for m=1:numel(gridB) aT= gridA(n); % extract 'test' parameters bT= gridB(m); chiS(n,m)= sum((data.y-aT-bT*data.x).^2); % determine chi-squared and store away end end % determines coords. of global minimum in chi-squared array (two lines % below are a quick/dirty way to find the desired coords.) [junk,indxT] = min(chiS(:)); [p,q] = ind2sub(size(chiS),indxT); a2= gridA(p); b2= gridB(q); % extract the associated values top2= sum((data.y-a2-b2*data.x).^2); RS2= 1- top2/bottom1; disp(['Method 2 (grid-search): y= ',num2str(a2),' + ',num2str(b2),'*x (r^2 = ',num2str(RS2),')']); % ------------ % visualize plot(data.x,data.y,'r.'); % original 'data' points hold on; grid on; xlabel('x'); ylabel('y'); axis([xMin xMax floor(min(data.y)) ceil(max(data.y))]); plot(data.x,aD+bD*data.x,'g-','LineWidth',2); % base function (sans noise) plot(data.x,p0(2)+p0(1)*data.x,'k+'); % Method 0 fit plot(data.x,a1+b1*data.x,'b-'); % Method 1 fit plot(data.x,a2+b2*data.x,'m*'); % Method 2 fit legend('noisy data','base function','Method 0 fit','Method 1 fit','Method 2 fit','Location','NorthWest');