-
Notifications
You must be signed in to change notification settings - Fork 2
/
svmlread.m
85 lines (78 loc) · 2.41 KB
/
svmlread.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
function [Y, X] = svmlread(fname)
% SVMLREAD - Read a data file generated by SVM light
%
% Y = SVMLREAD(FNAME)
% FNAME gives the name of an output file generated by SVM light. It
% may contain predicted labels, coefficients alpha, or an input
% (example) file with class values and features. From this file the
% data in the first column (class labels or alphas) is extrated and
% returned in Y.
% [Y, X] = SVMLREAD(FNAME), where FNAME is the name of an input file
% with class values and features, returns both the vector of class
% labels Y and the matrix of examples X. Each line of X corresponds to
% a line in the file.
% Attention: this may take a while...
%
% See also SVML, SVM_LEARN, SVM_CLASSIFY, SVMLOPT, SVMLWRITE
%
%
% Copyright (c) by Anton Schwaighofer (2001)
% $Revision: 1.6 $ $Date: 2002/02/19 12:26:07 $
% mailto:anton.schwaighofer@gmx.net
%
% This program is released unter the GNU General Public License.
%
error(nargchk(1, 1, nargin));
X = [];
Y = [];
f = fopen(fname, 'rt');
if (f<0),
error(sprintf('Unable to open file %s', fname));
end
i = 0;
fprintf('Scanning ');
while ~feof(f),
s = fgetl(f);
[Yi, count, errmsg, nextind] = sscanf(s, '%f', 1);
% read the class label resp. anything else that is in the first column
if (count==1),
i = i+1;
Y(i,1) = Yi;
[Xi, count] = sscanf(s(nextind:end), ' %i:%f');
% scan for the feature:value pairs
if (rem(count,2)==0) & (count~=0),
% if they really come in pairs, then accept
ind = 2:2:count;
if isempty(X),
maxCol = max(Xi(ind-1));
approxSparsity = (count/2)/maxCol;
% a rough estimate of the sparsity, based on the first line of
% data
if approxSparsity>0.5,
approxSparsity = 1;
X = zeros(maxCol, 1000);
else
X = spalloc(maxCol, 1000, round(1000*maxCol*approxSparsity));
% allocate for 1000 data points (lines) beforehand
% We store everything *columnwise* and transpose afterwards,
% this greatly improves performance
end
end
X(Xi(ind-1),i) = Xi(ind);
if (rem(i,100)==0),
fprintf(' %i', i);
end
end
end
end
fprintf(' done.\n');
if ~isempty(X),
X = X(:,1:i)';
sparsity = length(find(X))/prod(size(X));
if sparsity<0.5,
X = sparse(X);
% remove any surplus lines & convert to sparse a second time for
% optimal memory usage
end
end
fclose(f);