-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoftmax_regression_vec.m
39 lines (34 loc) · 1.31 KB
/
softmax_regression_vec.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
function [f,g] = softmax_regression_vec(theta, X, y, lambda)
%
% Arguments:
% theta - A vector containing the parameter values to optimize.
% In minFunc, theta is reshaped to a long vector. So we need to
% resize it to an n-by-(num_classes-1) matrix.
% Recall that we assume theta(:,num_classes) = 0.
%
% X - The examples stored in a matrix.
% X(i,j) is the i'th coordinate of the j'th example.
% y - The label for each example. y(j) is the j'th example's label.
%
m=size(X,2);
n=size(X,1);
% theta is a vector; need to reshape to n x num_classes.
theta=reshape(theta, n, []);
num_classes=size(theta,2)+1;
% initialize objective value and gradient.
f = 0;
g = zeros(size(theta));
for idx = 1:num_classes-1
f=f-(y==idx)*log(computeProb(theta,idx,X))';
g(:,idx)=-X*((y==idx)-computeProb(theta,idx,X))';
end
% f=f-(y==num_classes)*log(ones(size(y))./(exp(sum(theta'*X,1)+length(y))))';
% g(:,num_classes)=-X*((y==num_classes)-ones(size(y))./(exp(sum(theta'*X,1)+length(y))))';
% Regularization
for idx = 2:n
f=f+lambda/2*sum(theta(idx,:).^2);
g(idx,:)=g(idx,:)+lambda*theta(idx,:);
end
g=g(:); % make gradient a vector for minFunc
function p=computeProb(theta,idx,X)
p=exp(theta(:,idx)'*X)./(sum(exp(theta'*X),1));