% ### EXregressionEX3.m ###     10.03.14
% Fit a quadratic: y(x) = a + b*x + c*x^2 (i.e., find optimal values for a, b, and
% c for a quadratic function to a given set of data) using three methods:
% Method 0. Matlab's built-in blackbox function polyfit.m
% Method 1. linear fit via exact solution
% Method 2. polynomial fit via exact analytic solution (via Bevington)
% Method 3. brute force minimizing chi-squared via a grid-search method (user specifies grid)
% --> assumes sigma_i=1 for all points (i.e., unit variance, const. for all points)

clear; figure(1); clf;
% -------------------------------------
% specify range of x-values
xMin= 0;    % min.
xMax= 2;    % max.
xNum= 100;  % number of 'data' points
%  choose parameters of linear function (y=aD+bD*x+noise)
aD= 0.4;   % intercept
bD= 0.6;   % linear term
cD= 0.9;   % quadratic term
noiseF= 0.1;   % noise factor {0.1}
% define 'grid' range for Method 2 (i.e., one needs to 'guess' here!)
gridA= linspace(0,1,100);   % intercept
gridB= linspace(0,1,100);   % linear term
gridC= linspace(0,1,100);   % quadratic term

% -------------------------------------
data.x= linspace(xMin,xMax,xNum);
data.y= aD+ bD*data.x+ cD*data.x.^2+ noiseF*randn(numel(data.x),1)'; % determine noisy (quadratic) line
disp(['==================================']);
disp(['Base function: y= a+b*x+c*x^2= ',num2str(aD),' + ',num2str(bD),'*x + ',num2str(cD),'*x^2 (+ Gaussian noise)']);
N= numel(data.x);   % total number of points
data.SD= ones(N,1)';    % dummy variable to serve as standard deviation (makes it easier to code later if this is included)
% ------------
% Method 0: Built-in Matlab function
[p0,S]= polyfit(data.x,data.y,2);
% also calculate r^2 (coefficient of determination) via
% http://www.mathworks.com/help/matlab/data_analysis/linear-regression.html?refresh=true
yfit = polyval(p0,data.x);    % this line is equivalent to > yfit =  p(1) * x + p(2);
yresid = data.y - yfit;
SSresid = sum(yresid.^2);
SStotal = (length(data.y)-1) * var(data.y);
RS0= 1 - SSresid/SStotal;
disp(['Method 0 (via polyfit): y= ',num2str(p0(2)),' + ',num2str(p0(1)),'*x (r^2 = ',num2str(RS0),')']);
% ------------
% Method 1: Linear regression (see ch.6 from Bevington, eqns.6.13 and 6.23)
Delta= N*sum(data.x.^2)-sum(data.x)^2;
a1= 1/Delta*(sum(data.x.^2)*sum(data.y)- sum(data.x)*sum(data.x.*data.y));
b1= 1/Delta*(N*sum(data.x.*data.y)- sum(data.x)*sum(data.y));
sigmaA1= sqrt(1/Delta*sum(data.x.^2));
sigmaB1= sqrt(N/Delta);
% also calculate r^2 (coefficient of determination) via
% http://en.wikipedia.org/wiki/Coefficient_of_determination
meanY= sum(data.y)/N;
bottom1= sum((data.y-meanY).^2);
top1= sum((data.y-a1-b1*data.x).^2);
RS1= 1- top1/bottom1;
disp(['Method 1 (linear): y= ',num2str(a1),' + ',num2str(b1),'*x (r^2 = ',num2str(RS1),')']);
% ------------
% Method 2: Polynomial regression for a quadratic (see ch.7 from Bevington, eqns.7.9)
% determine a set of dummy variables
XX= data.SD.^2;
A0= sum(1./XX);
Ay1= sum(data.y./XX);
Ax1= sum(data.x./XX);
Ax2= sum((data.x.^2)./XX);
Ax3= sum((data.x.^3)./XX);
Ax4= sum((data.x.^4)./XX);
Axy1= sum((data.x.*data.y)./XX);
Axy2= sum((((data.x).^2).*data.y)./XX);
% now build up the relevant matrices
Delta= det([A0 Ax1 Ax2; Ax1 Ax2 Ax3; Ax2 Ax3 Ax4]);
Amat= [Ay1 Ax1 Ax2; Axy1 Ax2 Ax3; Axy2 Ax3 Ax4];
Bmat= [A0 Ay1 Ax2; Ax1 Axy1 Ax3; Ax2 Axy2 Ax4];
Cmat= [A0 Ax1 Ay1; Ax1 Ax2 Axy1; Ax2 Ax3 Axy2];
a2= 1/Delta*det(Amat);  % now determine the associated parameters
b2= 1/Delta*det(Bmat);
c2= 1/Delta*det(Cmat);
top2= sum((data.y-a2-b2*data.x-c2*data.x.^2).^2);
RS2= 1- top2/bottom1;
disp(['Method 2 (exact, quadratic): y= ',num2str(a2),' + ',num2str(b2),'*x + ',num2str(c2),'*x^2 (r^2 = ',num2str(RS2),')']);
% ------------
% Method 3: Brute-force minimize chi-squared via grid search
for n=1:numel(gridA)
    for m=1:numel(gridB)
        for z=1:numel(gridC)
            aT= gridA(n);   % extract 'test' parameters
            bT= gridB(m);
            cT= gridC(z);
            chiS(n,m,z)= sum((data.y-aT-bT*data.x-cT*data.x.^2).^2); % determine chi-squared and store away
        end
    end
end
% determines coords. of global minimum in chi-squared array (two lines
% below are a quick/dirty way to find the desired coords.)
[junk,indxT] = min(chiS(:));
[p,q,r] = ind2sub(size(chiS),indxT);
a3= gridA(p);   b3= gridB(q);  c3= gridC(r); % extract the associated values
top3= sum((data.y-a3-b3*data.x-c3*data.x.^2).^2);
RS3= 1- top3/bottom1;
disp(['Method 3 (grid-search): y= ',num2str(a3),' + ',num2str(b3),'*x + ',num2str(c3),'*x^2 (r^2 = ',num2str(RS3),')']);
% ------------
% visualize
plot(data.x,data.y,'r.');   % original 'data' points
hold on; grid on; xlabel('x'); ylabel('y'); axis([xMin xMax floor(min(data.y)) ceil(max(data.y))]);
plot(data.x,aD+bD*data.x+cD*data.x.^2,'g-','LineWidth',2); % base function (sans noise)
plot(data.x,p0(3)+p0(2)*data.x+p0(1)*data.x.^2,'k+'); % Method 0 fit
plot(data.x,a1+b1*data.x,'b-'); % Method 1 fit
plot(data.x,a2+b2*data.x+c2*data.x.^2,'rs'); % Method 2 fit
plot(data.x,a3+b3*data.x+c3*data.x.^2,'m*'); % Method 3 fit
legend('noisy data','base function','Method 0 fit','Method 1 fit','Method 2 fit','Method 3 fit','Location','NorthWest');