Skip to content

Commit 57cd5b5

Browse files
author
Johann Paratte
committed
Added dependency
1 parent 18a4abe commit 57cd5b5

File tree

6 files changed

+701
-0
lines changed

6 files changed

+701
-0
lines changed
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
function data = create_synthetic_dataset(data)
2+
% create_synthetic_dataset creates test data for running nldr algorithms.
3+
%
4+
% inputs:
5+
% data a struct describing the test data
6+
% .dataset the number of the example, see code for more infos
7+
% .n the number of data points (default=400)
8+
% .state the initial state for the random numbers (default=0)
9+
% .noise the variance of Gaussian noise to add (default=0)
10+
% other options for some of the data sets (see code)
11+
% alternatively, data = 1 chooses the dataset directly,
12+
% the number of points defaults to 1000
13+
%
14+
% outputs:
15+
% data a struct containing .x the generated data, each column is
16+
% a data point, and other stuff:
17+
% .z the "correct" embedding
18+
% .e some random noise of same dimensionality
19+
% .x_noisefree the noisefree version of .x, i.e.
20+
% .x = .xnoise_free + sqrt(.noise) .e
21+
%
22+
% Adapted from create.m, originally written by
23+
% (c) Stefan Harmeling, 2006
24+
% using the examples of the original LLE and ISOMAP code.
25+
%
26+
% Url: http://lts2research.epfl.ch/gsp/doc/sgwt_require/create_synthetic_dataset.php
27+
28+
% Copyright (C) 2013-2014 Nathanael Perraudin, Johan Paratte, David I Shuman.
29+
% This file is part of GSPbox version 0.3.0
30+
%
31+
% This program is free software: you can redistribute it and/or modify
32+
% it under the terms of the GNU General Public License as published by
33+
% the Free Software Foundation, either version 3 of the License, or
34+
% (at your option) any later version.
35+
%
36+
% This program is distributed in the hope that it will be useful,
37+
% but WITHOUT ANY WARRANTY; without even the implied warranty of
38+
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39+
% GNU General Public License for more details.
40+
%
41+
% You should have received a copy of the GNU General Public License
42+
% along with this program. If not, see <http://www.gnu.org/licenses/>.
43+
44+
% If you use this toolbox please kindly cite
45+
% N. Perraudin, J. Paratte, D. Shuman, V. Kalofolias, P. Vandergheynst,
46+
% and D. K. Hammond. GSPBOX: A toolbox for signal processing on graphs.
47+
% ArXiv e-prints, Aug. 2014.
48+
% http://arxiv.org/abs/1408.5781
49+
50+
if ~isfield(data, 'dataset'),
51+
number = data;
52+
clear data
53+
data.dataset = number;
54+
end
55+
if ~isfield(data, 'n'), data.n = 400; end
56+
if ~isfield(data, 'noise'), data.noise = 0.0; end
57+
if ~isfield(data, 'state'), data.state = 0; end
58+
59+
% set the randomness
60+
rand('state', data.state);
61+
randn('state', data.state);
62+
63+
data.typ = 'data';
64+
switch data.dataset
65+
case 0 % "swiss roll with hole"
66+
data.name = 'swiss roll with hole';
67+
n = data.n;
68+
a = 1; % swiss roll goes from a*pi to b*pi
69+
b = 4;
70+
y = rand(2,n);
71+
% punch a rectangular hole at the center
72+
l1 = 0.05; l2 = 0.15;
73+
y = y - 0.5;
74+
ok = find((abs(y(1,:))>l1) | (abs(y(2,:))>l2));
75+
i = length(ok);
76+
y(:, 1:i) = y(:, ok);
77+
while (i<n)
78+
p = rand(2,1) - 0.5;
79+
if (abs(p(1))>l1) || (abs(p(2))>l2)
80+
i = i + 1;
81+
y(:,i) = p;
82+
end
83+
end
84+
y = y + 0.5;
85+
tt = (b-a)*y(1,:) + a;
86+
tt = pi*tt;
87+
height = 21*y(2,:);
88+
data.col = tt;
89+
data.x = [tt.*cos(tt); height; tt.*sin(tt)];
90+
data.z = [tt; height]; % the ground truth
91+
data.az = -4;
92+
data.el = 13;
93+
94+
case -1 % "swiss roll" dataset extracted from LLE's swissroll.m
95+
data.name = 'uniform swiss roll';
96+
n = data.n;
97+
a = 1; % swiss roll goes from a*pi to b*pi
98+
b = 4;
99+
y = rand(2,n);
100+
data.z = y; % the ground truth
101+
switch 1
102+
case 1
103+
% uniform distribution along the manifold (in data space)
104+
tt = sqrt((b*b-a*a)*y(1,:)+a*a);
105+
case 2
106+
% error('do not use this case')
107+
% nonuniform distribution along the manifold (in data space)
108+
tt = (b-a)*y(1,:) + a;
109+
end
110+
tt = pi*tt;
111+
% now tt should go from a*pi to b*pi
112+
height = 21*y(2,:);
113+
data.col = tt;
114+
data.x = [tt.*cos(tt); height; tt.*sin(tt)];
115+
data.az = -4;
116+
data.el = 13;
117+
118+
case 1 % "swiss roll (uniform in embedding space)"
119+
% dataset extracted from LLE's swissroll.m
120+
data.name = 'classic swiss roll';
121+
n = data.n;
122+
a = 1; % swiss roll goes from a*pi to b*pi
123+
b = 4;
124+
y = rand(2,n);
125+
tt = (b-a)*y(1,:) + a;
126+
tt = pi*tt;
127+
height = 21*y(2,:);
128+
data.col = tt;
129+
data.x = [tt.*cos(tt); height; tt.*sin(tt)];
130+
data.z = [tt; height]; % the ground truth
131+
data.az = -4;
132+
data.el = 13;
133+
134+
case 11 % "undersampled swiss roll"
135+
% dataset extracted from LLE's swissroll.m
136+
data.name = 'undersampled swiss roll';
137+
data.n = 100;
138+
n = data.n;
139+
a = 1; % swiss roll goes from a*pi to b*pi
140+
b = 4;
141+
y = rand(2,n);
142+
tt = (b-a)*y(1,:) + a;
143+
tt = pi*tt;
144+
height = 21*y(2,:);
145+
data.col = tt;
146+
data.x = [tt.*cos(tt); height; tt.*sin(tt)];
147+
data.z = [tt; height]; % the ground truth
148+
data.az = -4;
149+
data.el = 13;
150+
151+
case 12 % "swiss roll"
152+
% dataset extracted from LLE's swissroll.m
153+
data.name = 'classic swiss roll';
154+
data.n = 400;
155+
n = data.n;
156+
a = 1; % swiss roll goes from a*pi to b*pi
157+
b = 4;
158+
y = rand(2,n);
159+
tt = (b-a)*y(1,:) + a;
160+
tt = pi*tt;
161+
height = 21*y(2,:);
162+
data.col = tt;
163+
data.x = [tt.*cos(tt); height; tt.*sin(tt)];
164+
data.z = [tt; height]; % the ground truth
165+
data.az = -4;
166+
data.el = 13;
167+
168+
case 2 % "scurve" dataset extracted from LLE's scurve.m
169+
data.name = 'scurve';
170+
n = data.n;
171+
% I added 'ceil' and 'floor' to account for the case that n is odd
172+
angle = pi*(1.5*rand(1,ceil(n/2))-1); height = 5*rand(1,n);
173+
data.x = [[cos(angle), -cos(angle(1:floor(n/2)))]; height;[ sin(angle), 2-sin(angle)]];
174+
data.col = [angle, 1.5*pi + angle];
175+
data.z = [angle, 1.5*pi+angle; height]; % the ground truth
176+
177+
case 3 % "square" dataset, a uniformly sampled 2D square randomly
178+
% rotated into higher dimensions
179+
data.name = 'square';
180+
n = data.n;
181+
d = 2; % intrinsic dimension
182+
% optional parameter for dataset==3
183+
% data.D dimension of the data
184+
if ~isfield(data, 'D'), data.D = 3; end
185+
% generate random rotation matrix
186+
D = data.D;
187+
A = randn(D, D);
188+
options.disp = 0;
189+
[R, dummy] = eigs(A*A', d, 'LM', options);
190+
tt = rand(d, n);
191+
data.col = tt(1,:);
192+
data.x = R*tt;
193+
data.z = tt; % the ground truth
194+
data.az = 7;
195+
data.el = 40;
196+
197+
case 4 % spiral: two dimensional "swiss roll"
198+
data.name = 'spiral';
199+
n = data.n;
200+
tt = (3*pi/2)*(1+2*rand(1, n));
201+
data.col = tt;
202+
data.x = [tt.*cos(tt); tt.*sin(tt)];
203+
data.z = tt; % the ground truth
204+
205+
case -4 % spiral: two dimensional "swiss roll"
206+
data.name = 'noisy spiral';
207+
n = data.n;
208+
tt = (3*pi/2)*(1+2*rand(1, n));
209+
data.col = tt;
210+
data.x = [tt.*cos(tt); tt.*sin(tt)];
211+
data.x = data.x + randn(size(data.x));
212+
data.z = tt; % the ground truth
213+
214+
case 5 % hole: a dataset with a hole
215+
data.name = 'hole';
216+
n = data.n;
217+
data.x = rand(2,n) - 0.5;
218+
% punch a rectangular hole at the center
219+
l1 = 0.2; l2 = 0.2;
220+
ok = find((abs(data.x(1,:))>l1) | (abs(data.x(2,:))>l2));
221+
i = length(ok);
222+
data.x(:, 1:i) = data.x(:, ok);
223+
while (i<n)
224+
p = rand(2,1) - 0.5;
225+
if (abs(p(1))>l1) || (abs(p(2))>l2)
226+
i = i + 1;
227+
data.x(:,i) = p;
228+
end
229+
end
230+
data.col = data.x(2,:);
231+
data.z = data.x;
232+
233+
case 6 % P : taken from Saul's slides
234+
% note that for k=20, isomap and lle work fine which is very different
235+
% from the plots that Saul showed in his slides.
236+
data.name = 'P';
237+
load x
238+
x(2,:) = 500-x(2,:);
239+
data.x = x;
240+
data.z = x;
241+
data.col = data.z(2,:);
242+
data.n = size(x, 2);
243+
244+
case 7 % fishbowl: uniform in data space
245+
gamma = 0.8;
246+
data.name = 'fishbowl (uniform in data space)';
247+
n = data.n;
248+
data.x = rand(3,n)-0.5;
249+
%project all data onto the surface of the unit sphere
250+
data.x = data.x ./ repmat(sqrt(sum(data.x.*data.x, 1)), [3 1]);
251+
ok = find(data.x(3,:) < gamma);
252+
i = length(ok);
253+
data.x(:, 1:i) = data.x(:, ok);
254+
while (i < n)
255+
p = rand(3,1)-0.5;
256+
p = p / sqrt(p'*p);
257+
if (p(3) < gamma)
258+
i = i+1;
259+
data.x(:, i) = p;
260+
end
261+
end
262+
% the projection on the plane works as follows:
263+
% start a beam from (0,0,1) through each surface point on the sphere
264+
% and look where it hits the xy plane.
265+
data.z = data.x(1:2,:) ./ repmat(1-data.x(3,:), [2 1]);
266+
data.col = data.x(3,:);
267+
data.az = -18;
268+
data.el = 16;
269+
case 8 % fishbowl: uniform in embedding space
270+
data.name = 'fishbowl (uniform in embedding space)';
271+
n = data.n;
272+
data.z = rand(2, n) - 0.5;
273+
% keep the disc
274+
ok = find(sum(data.z .* data.z) <= 0.25);
275+
i = length(ok);
276+
data.z(:, 1:i) = data.z(:, ok);
277+
while (i < n)
278+
p = rand(2,1) - 0.5;
279+
if (p'*p <= 0.25)
280+
i = i + 1;
281+
data.z(:, i) = p;
282+
end
283+
end
284+
gamma = 0.8; % same role/parameter as in case 7
285+
data.z = 2*sqrt((1+gamma)/(1-gamma))*data.z;
286+
% project the disc onto the sphere
287+
alpha = 2 ./ (1 + sum(data.z .* data.z, 1));
288+
data.x = [repmat(alpha, [2 1]).*data.z; zeros(1, n)];
289+
data.x(3,:) = 1-alpha;
290+
data.col = data.x(3,:);
291+
data.az = -18;
292+
data.el = 16;
293+
294+
case 9 % a gaussian blob
295+
data.name = 'gaussian blob';
296+
n = data.n;
297+
data.x = randn(3,n);
298+
data.z = data.x(2:3,:);
299+
data.col = data.x(3,:);
300+
301+
end
302+
303+
304+
data.D = size(data.x, 1); % dimensionality of the data
305+
% finally generate noise
306+
data.e = randn(size(data.x));
307+
data.x_noisefree = data.x; % the noise free data
308+
data.x = data.x_noisefree + sqrt(data.noise)*data.e;
309+
310+
% precalculate the distanzmatrix
311+
data.distances = gsp_distanz(data.x);
312+
313+
314+

0 commit comments

Comments
 (0)