% ### EXregressionEX3.m ### 10.03.14 % Fit a quadratic: y(x) = a + b*x + c*x^2 (i.e., find optimal values for a, b, and % c for a quadratic function to a given set of data) using three methods: % Method 0. Matlab's built-in blackbox function polyfit.m % Method 1. linear fit via exact solution % Method 2. polynomial fit via exact analytic solution (via Bevington) % Method 3. brute force minimizing chi-squared via a grid-search method (user specifies grid) % --> assumes sigma_i=1 for all points (i.e., unit variance, const. for all points) clear; figure(1); clf; % ------------------------------------- % specify range of x-values xMin= 0; % min. xMax= 2; % max. xNum= 100; % number of 'data' points % choose parameters of linear function (y=aD+bD*x+noise) aD= 0.4; % intercept bD= 0.6; % linear term cD= 0.9; % quadratic term noiseF= 0.1; % noise factor {0.1} % define 'grid' range for Method 2 (i.e., one needs to 'guess' here!) gridA= linspace(0,1,100); % intercept gridB= linspace(0,1,100); % linear term gridC= linspace(0,1,100); % quadratic term % ------------------------------------- data.x= linspace(xMin,xMax,xNum); data.y= aD+ bD*data.x+ cD*data.x.^2+ noiseF*randn(numel(data.x),1)'; % determine noisy (quadratic) line disp(['==================================']); disp(['Base function: y= a+b*x+c*x^2= ',num2str(aD),' + ',num2str(bD),'*x + ',num2str(cD),'*x^2 (+ Gaussian noise)']); N= numel(data.x); % total number of points data.SD= ones(N,1)'; % dummy variable to serve as standard deviation (makes it easier to code later if this is included) % ------------ % Method 0: Built-in Matlab function [p0,S]= polyfit(data.x,data.y,2); % also calculate r^2 (coefficient of determination) via % http://www.mathworks.com/help/matlab/data_analysis/linear-regression.html?refresh=true yfit = polyval(p0,data.x); % this line is equivalent to > yfit = p(1) * x + p(2); yresid = data.y - yfit; SSresid = sum(yresid.^2); SStotal = (length(data.y)-1) * var(data.y); RS0= 1 - SSresid/SStotal; disp(['Method 0 (via polyfit): y= ',num2str(p0(2)),' + ',num2str(p0(1)),'*x (r^2 = ',num2str(RS0),')']); % ------------ % Method 1: Linear regression (see ch.6 from Bevington, eqns.6.13 and 6.23) Delta= N*sum(data.x.^2)-sum(data.x)^2; a1= 1/Delta*(sum(data.x.^2)*sum(data.y)- sum(data.x)*sum(data.x.*data.y)); b1= 1/Delta*(N*sum(data.x.*data.y)- sum(data.x)*sum(data.y)); sigmaA1= sqrt(1/Delta*sum(data.x.^2)); sigmaB1= sqrt(N/Delta); % also calculate r^2 (coefficient of determination) via % http://en.wikipedia.org/wiki/Coefficient_of_determination meanY= sum(data.y)/N; bottom1= sum((data.y-meanY).^2); top1= sum((data.y-a1-b1*data.x).^2); RS1= 1- top1/bottom1; disp(['Method 1 (linear): y= ',num2str(a1),' + ',num2str(b1),'*x (r^2 = ',num2str(RS1),')']); % ------------ % Method 2: Polynomial regression for a quadratic (see ch.7 from Bevington, eqns.7.9) % determine a set of dummy variables XX= data.SD.^2; A0= sum(1./XX); Ay1= sum(data.y./XX); Ax1= sum(data.x./XX); Ax2= sum((data.x.^2)./XX); Ax3= sum((data.x.^3)./XX); Ax4= sum((data.x.^4)./XX); Axy1= sum((data.x.*data.y)./XX); Axy2= sum((((data.x).^2).*data.y)./XX); % now build up the relevant matrices Delta= det([A0 Ax1 Ax2; Ax1 Ax2 Ax3; Ax2 Ax3 Ax4]); Amat= [Ay1 Ax1 Ax2; Axy1 Ax2 Ax3; Axy2 Ax3 Ax4]; Bmat= [A0 Ay1 Ax2; Ax1 Axy1 Ax3; Ax2 Axy2 Ax4]; Cmat= [A0 Ax1 Ay1; Ax1 Ax2 Axy1; Ax2 Ax3 Axy2]; a2= 1/Delta*det(Amat); % now determine the associated parameters b2= 1/Delta*det(Bmat); c2= 1/Delta*det(Cmat); top2= sum((data.y-a2-b2*data.x-c2*data.x.^2).^2); RS2= 1- top2/bottom1; disp(['Method 2 (exact, quadratic): y= ',num2str(a2),' + ',num2str(b2),'*x + ',num2str(c2),'*x^2 (r^2 = ',num2str(RS2),')']); % ------------ % Method 3: Brute-force minimize chi-squared via grid search for n=1:numel(gridA) for m=1:numel(gridB) for z=1:numel(gridC) aT= gridA(n); % extract 'test' parameters bT= gridB(m); cT= gridC(z); chiS(n,m,z)= sum((data.y-aT-bT*data.x-cT*data.x.^2).^2); % determine chi-squared and store away end end end % determines coords. of global minimum in chi-squared array (two lines % below are a quick/dirty way to find the desired coords.) [junk,indxT] = min(chiS(:)); [p,q,r] = ind2sub(size(chiS),indxT); a3= gridA(p); b3= gridB(q); c3= gridC(r); % extract the associated values top3= sum((data.y-a3-b3*data.x-c3*data.x.^2).^2); RS3= 1- top3/bottom1; disp(['Method 3 (grid-search): y= ',num2str(a3),' + ',num2str(b3),'*x + ',num2str(c3),'*x^2 (r^2 = ',num2str(RS3),')']); % ------------ % visualize plot(data.x,data.y,'r.'); % original 'data' points hold on; grid on; xlabel('x'); ylabel('y'); axis([xMin xMax floor(min(data.y)) ceil(max(data.y))]); plot(data.x,aD+bD*data.x+cD*data.x.^2,'g-','LineWidth',2); % base function (sans noise) plot(data.x,p0(3)+p0(2)*data.x+p0(1)*data.x.^2,'k+'); % Method 0 fit plot(data.x,a1+b1*data.x,'b-'); % Method 1 fit plot(data.x,a2+b2*data.x+c2*data.x.^2,'rs'); % Method 2 fit plot(data.x,a3+b3*data.x+c3*data.x.^2,'m*'); % Method 3 fit legend('noisy data','base function','Method 0 fit','Method 1 fit','Method 2 fit','Method 3 fit','Location','NorthWest');