CheeseZH: Stanford University: Machine Learning Ex1:Linear Regression

(1) How to comput the Cost function in Univirate/Multivariate Linear Regression;

(2) How to comput the Batch Gradient Descent function in Univirate/Multivariate Linear Regression;

(3) How to scale features by mean value and standard deviation;

(4) How to calculate Theta by normal equaltion;

Data1

6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705

View Code

1. ex1.m

  1 %% Machine Learning Online Class - Exercise 1: Linear Regression
  2 
  3 %  Instructions
  4 %  ------------
  5 % 
  6 %  This file contains code that helps you get started on the
  7 %  linear exercise. You will need to complete the following functions 
  8 %  in this exericse:
  9 %
 10 %     warmUpExercise.m
 11 %     plotData.m
 12 %     gradientDescent.m
 13 %     computeCost.m
 14 %     gradientDescentMulti.m
 15 %     computeCostMulti.m
 16 %     featureNormalize.m
 17 %     normalEqn.m
 18 %
 19 %  For this exercise, you will not need to change any code in this file,
 20 %  or any other files other than those mentioned above.
 21 %
 22 % x refers to the population size in 10,000s
 23 % y refers to the profit in $10,000s
 24 %
 25 
 26 %% Initialization
 27 clear ; close all; clc
 28 
 29 %% ==================== Part 1: Basic Function ====================
 30 % Complete warmUpExercise.m 
 31 fprintf('Running warmUpExercise ... 
');
 32 fprintf('5x5 Identity Matrix: 
');
 33 warmUpExercise()
 34 
 35 fprintf('Program paused. Press enter to continue.
');
 36 pause;
 37 
 38 
 39 %% ======================= Part 2: Plotting =======================
 40 fprintf('Plotting Data ...
')
 41 data = load('ex1data1.txt');
 42 X = data(:, 1); y = data(:, 2);
 43 m = length(y); % number of training examples
 44 
 45 % Plot Data
 46 % Note: You have to complete the code in plotData.m
 47 plotData(X, y);
 48 
 49 fprintf('Program paused. Press enter to continue.
');
 50 pause;
 51 
 52 %% =================== Part 3: Gradient descent ===================
 53 fprintf('Running Gradient Descent ...
')
 54 
 55 X = [ones(m, 1), data(:,1)]; % Add a column of ones to x
 56 theta = zeros(2, 1); % initialize fitting parameters
 57 
 58 % Some gradient descent settings
 59 iterations = 1500;
 60 alpha = 0.01;
 61 
 62 % compute and display initial cost
 63 computeCost(X, y, theta)
 64 
 65 % run gradient descent
 66 theta = gradientDescent(X, y, theta, alpha, iterations);
 67 
 68 % print theta to screen
 69 fprintf('Theta found by gradient descent: ');
 70 fprintf('%f %f 
', theta(1), theta(2));
 71 
 72 % Plot the linear fit
 73 hold on; % keep previous plot visible
 74 plot(X(:,2), X*theta, '-')
 75 legend('Training data', 'Linear regression')
 76 hold off % don't overlay any more plots on this figure
 77 
 78 % Predict values for population sizes of 35,000 and 70,000
 79 predict1 = [1, 3.5] *theta;
 80 fprintf('For population = 35,000, we predict a profit of %f
',...
 81     predict1*10000);
 82 predict2 = [1, 7] * theta;
 83 fprintf('For population = 70,000, we predict a profit of %f
',...
 84     predict2*10000);
 85 
 86 fprintf('Program paused. Press enter to continue.
');
 87 pause;
 88 
 89 %% ============= Part 4: Visualizing J(theta_0, theta_1) =============
 90 fprintf('Visualizing J(theta_0, theta_1) ...
')
 91 
 92 % Grid over which we will calculate J
 93 theta0_vals = linspace(-10, 10, 100);
 94 theta1_vals = linspace(-1, 4, 100);
 95 
 96 % initialize J_vals to a matrix of 0's
 97 J_vals = zeros(length(theta0_vals), length(theta1_vals));
 98 
 99 % Fill out J_vals
100 for i = 1:length(theta0_vals)
101     for j = 1:length(theta1_vals)
102       t = [theta0_vals(i); theta1_vals(j)];    
103       J_vals(i,j) = computeCost(X, y, t);
104     end
105 end
106 
107 
108 % Because of the way meshgrids work in the surf command, we need to 
109 % transpose J_vals before calling surf, or else the axes will be flipped
110 J_vals = J_vals';
111 % Surface plot
112 figure;
113 surf(theta0_vals, theta1_vals, J_vals)
114 xlabel('	heta_0'); ylabel('	heta_1');
115 
116 % Contour plot
117 figure;
118 % Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
119 contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
120 xlabel('	heta_0'); ylabel('	heta_1');
121 hold on;
122 plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2);

View Code

2.warmUpExercise.m

 1 function A = warmUpExercise()
 2 %WARMUPEXERCISE Example function in octave
 3 %   A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix
 4 
 5 A = [];
 6 % ============= YOUR CODE HERE ==============
 7 % Instructions: Return the 5x5 identity matrix 
 8 %               In octave, we return values by defining which variables
 9 %               represent the return values (at the top of the file)
10 %               and then set them accordingly. 
11 A = eye(5);
12 
13 
14 
15 
16 
17 
18 % ===========================================
19 
20 
21 end

View Code

3. computCost.m

 1 function J = computeCost(X, y, theta)
 2 %COMPUTECOST Compute cost for linear regression
 3 %   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 %   parameter for linear regression to fit the data points in X and y
 5 
 6 % Initialize some useful values
 7 m = length(y); % number of training examples
 8 
 9 % You need to return the following variables correctly 
10 J = 0;
11 
12 % ====================== YOUR CODE HERE ======================
13 % Instructions: Compute the cost of a particular choice of theta
14 %               You should set J to the cost.
15 hypothesis = X*theta;
16 J = 1/(2*m)*(sum((hypothesis-y).^2));
17 
18 % =========================================================================
19 
20 end

View Code

4.gradientDescent.m

 1 function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
 2 %GRADIENTDESCENT Performs gradient descent to learn theta
 3 %   theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by 
 4 %   taking num_iters gradient steps with learning rate alpha
 5 
 6 % Initialize some useful values
 7 m = length(y); % number of training examples
 8 J_history = zeros(num_iters, 1);
 9 
10 for iter = 1:num_iters
11 
12     % ====================== YOUR CODE HERE ======================
13     % Instructions: Perform a single gradient step on the parameter vector
14     %               theta. 
15     %
16     % Hint: While debugging, it can be useful to print out the values
17     %       of the cost function (computeCost) and gradient here.
18     %
19     hypothesis = X*theta;
20     delta = X'*(hypothesis-y);
21     theta  = theta - alpha/m*delta;
22 
23     % ============================================================
24 
25     % Save the cost J in every iteration    
26     J_history(iter) = computeCost(X, y, theta);
27 
28 end
29 
30 end

View Code

Data2

2104,3,399900
1600,3,329900
2400,3,369000
1416,2,232000
3000,4,539900
1985,4,299900
1534,3,314900
1427,3,198999
1380,3,212000
1494,3,242500
1940,4,239999
2000,3,347000
1890,3,329999
4478,5,699900
1268,3,259900
2300,4,449900
1320,2,299900
1236,3,199900
2609,4,499998
3031,4,599000
1767,3,252900
1888,2,255000
1604,3,242900
1962,4,259900
3890,3,573900
1100,3,249900
1458,3,464500
2526,3,469000
2200,3,475000
2637,3,299900
1839,2,349900
1000,1,169900
2040,4,314900
3137,3,579900
1811,4,285900
1437,3,249900
1239,3,229900
2132,4,345000
4215,4,549000
2162,4,287000
1664,2,368500
2238,3,329900
2567,4,314000
1200,3,299000
852,2,179900
1852,4,299900
1203,3,239500

View Code

0.ex1_multi.m

  1 %% Machine Learning Online Class
  2 %  Exercise 1: Linear regression with multiple variables
  3 %
  4 %  Instructions
  5 %  ------------
  6 % 
  7 %  This file contains code that helps you get started on the
  8 %  linear regression exercise. 
  9 %
 10 %  You will need to complete the following functions in this 
 11 %  exericse:
 12 %
 13 %     warmUpExercise.m
 14 %     plotData.m
 15 %     gradientDescent.m
 16 %     computeCost.m
 17 %     gradientDescentMulti.m
 18 %     computeCostMulti.m
 19 %     featureNormalize.m
 20 %     normalEqn.m
 21 %
 22 %  For this part of the exercise, you will need to change some
 23 %  parts of the code below for various experiments (e.g., changing
 24 %  learning rates).
 25 %
 26 
 27 %% Initialization
 28 
 29 %% ================ Part 1: Feature Normalization ================
 30 
 31 %% Clear and Close Figures
 32 clear ; close all; clc
 33 
 34 fprintf('Loading data ...
');
 35 
 36 %% Load Data
 37 data = load('ex1data2.txt');
 38 X = data(:, 1:2);
 39 y = data(:, 3);
 40 m = length(y);
 41 
 42 % Print out some data points
 43 fprintf('First 10 examples from the dataset: 
');
 44 fprintf(' x = [%.0f %.0f], y = %.0f 
', [X(1:10,:) y(1:10,:)]');
 45 
 46 fprintf('Program paused. Press enter to continue.
');
 47 pause;
 48 
 49 % Scale features and set them to zero mean
 50 fprintf('Normalizing Features ...
');
 51 
 52 [X mu sigma] = featureNormalize(X);
 53 
 54 % Add intercept term to X
 55 X = [ones(m, 1) X];
 56 
 57 
 58 %% ================ Part 2: Gradient Descent ================
 59 
 60 % ====================== YOUR CODE HERE ======================
 61 % Instructions: We have provided you with the following starter
 62 %               code that runs gradient descent with a particular
 63 %               learning rate (alpha). 
 64 %
 65 %               Your task is to first make sure that your functions - 
 66 %               computeCost and gradientDescent already work with 
 67 %               this starter code and support multiple variables.
 68 %
 69 %               After that, try running gradient descent with 
 70 %               different values of alpha and see which one gives
 71 %               you the best result.
 72 %
 73 %               Finally, you should complete the code at the end
 74 %               to predict the price of a 1650 sq-ft, 3 br house.
 75 %
 76 % Hint: By using the 'hold on' command, you can plot multiple
 77 %       graphs on the same figure.
 78 %
 79 % Hint: At prediction, make sure you do the same feature normalization.
 80 %
 81 
 82 fprintf('Running gradient descent ...
');
 83 
 84 % Choose some alpha value
 85 alpha = 0.01;
 86 num_iters = 400;
 87 
 88 % Init Theta and Run Gradient Descent 
 89 theta = zeros(3, 1);
 90 [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters);
 91 
 92 % Plot the convergence graph
 93 figure;
 94 plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2);
 95 xlabel('Number of iterations');
 96 ylabel('Cost J');
 97 
 98 % Display gradient descent's result
 99 fprintf('Theta computed from gradient descent: 
');
100 fprintf(' %f 
', theta);
101 fprintf('
');
102 
103 % Estimate the price of a 1650 sq-ft, 3 br house
104 % ====================== YOUR CODE HERE ======================
105 % Recall that the first column of X is all-ones. Thus, it does
106 % not need to be normalized.
107 price = 0; % You should change this
108 
109 
110 % ============================================================
111 
112 fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
113          '(using gradient descent):
 $%f
'], price);
114 
115 fprintf('Program paused. Press enter to continue.
');
116 pause;
117 
118 %% ================ Part 3: Normal Equations ================
119 
120 fprintf('Solving with normal equations...
');
121 
122 % ====================== YOUR CODE HERE ======================
123 % Instructions: The following code computes the closed form 
124 %               solution for linear regression using the normal
125 %               equations. You should complete the code in 
126 %               normalEqn.m
127 %
128 %               After doing so, you should complete this code 
129 %               to predict the price of a 1650 sq-ft, 3 br house.
130 %
131 
132 %% Load Data
133 data = csvread('ex1data2.txt');
134 X = data(:, 1:2);
135 y = data(:, 3);
136 m = length(y);
137 
138 % Add intercept term to X
139 X = [ones(m, 1) X];
140 
141 % Calculate the parameters from the normal equation
142 theta = normalEqn(X, y);
143 
144 % Display normal equation's result
145 fprintf('Theta computed from the normal equations: 
');
146 fprintf(' %f 
', theta);
147 fprintf('
');
148 
149 
150 % Estimate the price of a 1650 sq-ft, 3 br house
151 % ====================== YOUR CODE HERE ======================
152 price = 0; % You should change this
153 
154 
155 % ============================================================
156 
157 fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
158          '(using normal equations):
 $%f
'], price);

View Code

1.featureNormalize.m

 1 function [X_norm, mu, sigma] = featureNormalize(X)
 2 %FEATURENORMALIZE Normalizes the features in X 
 3 %   FEATURENORMALIZE(X) returns a normalized version of X where
 4 %   the mean value of each feature is 0 and the standard deviation
 5 %   is 1. This is often a good preprocessing step to do when
 6 %   working with learning algorithms.
 7 
 8 % You need to set these values correctly
 9 X_norm = X;
10 mu = zeros(1, size(X, 2));
11 sigma = zeros(1, size(X, 2));
12 
13 % ====================== YOUR CODE HERE ======================
14 % Instructions: First, for each feature dimension, compute the mean
15 %               of the feature and subtract it from the dataset,
16 %               storing the mean value in mu. Next, compute the 
17 %               standard deviation of each feature and divide
18 %               each feature by it's standard deviation, storing
19 %               the standard deviation in sigma. 
20 %
21 %               Note that X is a matrix where each column is a 
22 %               feature and each row is an example. You need 
23 %               to perform the normalization separately for 
24 %               each feature. 
25 %
26 % Hint: You might find the 'mean' and 'std' functions useful.
27 %       
28 mu = mean(X);
29 sigma = std(X);
30 X_norm = (X_norm.-mu)./sigma;
31 
32 % ============================================================
33 
34 end

View Code

2.computCostMulti.m

 1 function J = computeCostMulti(X, y, theta)
 2 %COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
 3 %   J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
 4 %   parameter for linear regression to fit the data points in X and y
 5 
 6 % Initialize some useful values
 7 m = length(y); % number of training examples
 8 
 9 % You need to return the following variables correctly 
10 J = 0;
11 
12 % ====================== YOUR CODE HERE ======================
13 % Instructions: Compute the cost of a particular choice of theta
14 %               You should set J to the cost.
15 hypothesis = X*theta;
16 J = 1/(2*m)*(sum((hypothesis-y).^2));
17 
18 
19 
20 
21 % =========================================================================
22 
23 end

View Code

3.gradientDescentMulti.m

 1 function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
 2 %GRADIENTDESCENTMULTI Performs gradient descent to learn theta
 3 %   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
 4 %   taking num_iters gradient steps with learning rate alpha
 5 
 6 % Initialize some useful values
 7 m = length(y); % number of training examples
 8 J_history = zeros(num_iters, 1);
 9 
10 for iter = 1:num_iters
11 
12     % ====================== YOUR CODE HERE ======================
13     % Instructions: Perform a single gradient step on the parameter vector
14     %               theta. 
15     %
16     % Hint: While debugging, it can be useful to print out the values
17     %       of the cost function (computeCostMulti) and gradient here.
18     %
19     hypothesis = X*theta;
20     delta = X'*(hypothesis-y);
21     theta  = theta - alpha/m*delta;
22 
23     % ============================================================
24 
25     % Save the cost J in every iteration    
26     J_history(iter) = computeCostMulti(X, y, theta);
27 
28 end
29 
30 end

View Code

4.normalEqn.m

 1 function [theta] = normalEqn(X, y)
 2 %NORMALEQN Computes the closed-form solution to linear regression 
 3 %   NORMALEQN(X,y) computes the closed-form solution to linear 
 4 %   regression using the normal equations.
 5 
 6 theta = zeros(size(X, 2), 1);
 7 
 8 % ====================== YOUR CODE HERE ======================
 9 % Instructions: Complete the code to compute the closed form solution
10 %               to linear regression and put the result in theta.
11 %
12 
13 % ---------------------- Sample Solution ----------------------
14 
15 theta = pinv(X'*X)*X'*y;
16 
17 
18 % -------------------------------------------------------------
19 
20 
21 % ============================================================
22 
23 end

View Code