%% Machine Learning Online Class - Exercise 2: Logistic Regression
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the logistic
% regression exercise. You will need to complete the following functions
% in this exericse:
%
% sigmoid.m
% costFunction.m
% predict.m
% costFunctionReg.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% Initialization
clear ; close all; clc
%% Load Data
% The first two columns contains the exam scores and the third column
% contains the label.
file_path1 = 'E:\picture\no\';% 无病害图像文件夹路径
file_path2 = 'E:\picture\yep\';% 有病害图像文件夹路径
img_path_list1 = dir(strcat(file_path1,'*.jpg'));%获取该文件夹中所有jpg格式的图像
img_path_list2 = dir(strcat(file_path2,'*.jpg'));%获取该文件夹中所有jpg格式的图像
img_num = 2*(length(img_path_list1)+length(img_path_list2));%获取图像总数量
X = zeros(img_num,400);
y = zeros(img_num,1);
if img_num > 0 %有满足条件的图像
for j = 1:length(img_path_list1) %逐一读取图像
image_name = img_path_list1(j).name;% 图像名
image_1 = imread(strcat(file_path1,image_name));
image = imresize(image_1,0.769);
temp1 = reshape(image,1,400);
X(j,:) = temp1;
y(j,:) = 1;
X(j+length(img_path_list1),:) = fliplr(X(j));
y(j+length(img_path_list1),:) = 1;
end
for j = 1:length(img_path_list2) %逐一读取图像
image_name = img_path_list2(j).name;% 图像名
image_1 = imread(strcat(file_path2,image_name));
image = imresize(image_1,0.769);
temp1 = reshape(image,1,400);
X(j+2*length(img_path_list1),:) = temp1;
y(j+2*length(img_path_list1),:) = 0;
X(j+2*length(img_path_list1)+length(img_path_list2),:) = fliplr(X(j)+2*length(img_path_list1));
y(j+2*length(img_path_list1)+length(img_path_list2),:) = 0;
end
end
Xy = [X,y];
[M, n] = size(X);
m = round(M*0.9); %取90%为训练集,10%为测试集
RandIndex = randperm( length( Xy ) );
Xy = Xy(RandIndex,:);%随机排序
X = Xy(1:m,1:n);
y = Xy(1:m,n+1);%训练集
X2 = Xy(m+1:M,1:n);
y2 = Xy(m+1:M,n+1);%测试集
%% ==================== Part 1: Plotting ====================
%% ============ Part 2: Compute Cost and Gradient ============
% In this part of the exercise, you will implement the cost and gradient
% for logistic regression. You neeed to complete the code in
% costFunction.m
% Setup the data matrix appropriately, and add ones for the intercept term
% Add intercept term to x and X_test
X = [ones(m, 1) X];
X2 = [ones(M-m, 1) X2];
% Initialize fitting parameters
initial_theta = zeros(n + 1, 1);
% Compute and display initial cost and gradient
[cost, grad] = costFunction(initial_theta, X, y);
fprintf('Cost at initial theta (zeros): %f\n', cost);
fprintf('Expected cost (approx): 0.693\n');
fprintf('Gradient at initial theta (zeros): \n');
fprintf(' %f \n', grad);
fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n');
% Compute and display cost and gradient with non-zero theta
%% ============= Part 3: Optimizing using fminunc =============
% In this exercise, you will use a built-in function (fminunc) to find the
% optimal parameters theta.
% Set options for fminunc
options = optimset('GradObj', 'on', 'MaxIter', 10000);
% Run fminunc to obtain the optimal theta
% This function will return theta and the cost
[theta, cost] = ...
fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
% Print theta to screen
fprintf('Cost at theta found by fminunc: %f\n', cost);
fprintf('Expected cost (approx): 0.203\n');
fprintf('theta: \n');
fprintf(' %f \n', theta);
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ============== Part 4: Predict and Accuracies ==============
% After learning the parameters, you'll like to use it to predict the outcomes
% on unseen data. In this part, you will use the logistic regression model
% to predict the probability that a student with score 45 on exam 1 and
% score 85 on exam 2 will be admitted.
%
% Furthermore, you will compute the training and test set accuracies of
% our model.
%
% Your task is to complete the code in predict.m
% Predict probability for a student with score 45 on exam 1
% and score 85 on exam 2
% Compute accuracy on our training set
p = predict(theta, X2);
fprintf('Train Accuracy: %f\n', mean(double(p == y2 )) * 100);
fprintf('\n');