9
9
using Meta . Numerics . Statistics ;
10
10
using Meta . Numerics . Statistics . Distributions ;
11
11
12
+ using Newtonsoft . Json ;
13
+
12
14
namespace Examples {
13
15
14
16
public static class Data {
15
17
18
+ public static void ConstructTestCsv ( ) {
19
+
20
+ using ( TextWriter writer = new StreamWriter ( File . OpenWrite ( "test.csv" ) ) ) {
21
+ writer . WriteLine ( "Id, Name, Sex, Birthdate, Height, Weight, Result" ) ;
22
+ writer . WriteLine ( "1, John, M, 1970-01-02, 190.0, 75.0, True" ) ;
23
+ writer . WriteLine ( "2, Mary, F, 1980-02-03, 155.0, 40.0, True" ) ;
24
+ writer . WriteLine ( "3, Luke, M, 1990-03-04, 180.0, 60.0, False" ) ;
25
+ }
26
+
27
+ }
28
+
29
+ [ ExampleMethod ]
30
+ public static void ImportingData ( ) {
31
+
32
+ FrameTable data ;
33
+ using ( TextReader reader = File . OpenText ( "test.csv" ) ) {
34
+ data = FrameTable . FromCsv ( reader ) ;
35
+ }
36
+
37
+ Console . WriteLine ( $ "Imported CSV file with { data . Rows . Count } rows.") ;
38
+ Console . WriteLine ( "The names and types of the columns are:" ) ;
39
+ foreach ( FrameColumn column in data . Columns ) {
40
+ Console . WriteLine ( $ " { column . Name } of type { column . StorageType } ") ;
41
+ }
42
+
43
+ FrameTable titanic ;
44
+ Uri url = new Uri ( "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" ) ;
45
+ WebRequest request = WebRequest . Create ( url ) ;
46
+ using ( WebResponse response = request . GetResponse ( ) ) {
47
+ using ( StreamReader reader = new StreamReader ( response . GetResponseStream ( ) ) ) {
48
+ titanic = FrameTable . FromCsv ( reader ) ;
49
+ }
50
+ }
51
+
52
+ Uri jsonUrl = new Uri ( "https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json" ) ;
53
+ WebClient client = new WebClient ( ) ;
54
+ string input = client . DownloadString ( jsonUrl ) ;
55
+ List < Dictionary < string , object > > output = JsonConvert . DeserializeObject < List < Dictionary < string , object > > > ( input ) ;
56
+ FrameTable jsonExample = FrameTable . FromDictionaries ( output ) ;
57
+
58
+ // Define the schema.
59
+ FrameTable table = new FrameTable ( ) ;
60
+ table . AddColumn < int > ( "Id" ) ;
61
+ table . AddColumn < string > ( "Name" ) ;
62
+ table . AddColumn < string > ( "Sex" ) ;
63
+ table . AddColumn < DateTime > ( "Birthdate" ) ;
64
+ table . AddColumn < double > ( "Height" ) ;
65
+ table . AddColumn < double ? > ( "Weight" ) ;
66
+ table . AddColumn < bool > ( "Result" ) ;
67
+
68
+ // Add rows using as arrays of objects.
69
+ table . AddRow ( 1 , "John" , "M" , DateTime . Parse ( "1970-01-02" ) , 190.0 , 75.0 , true ) ;
70
+ table . AddRow ( 2 , "Mary" , "F" , DateTime . Parse ( "1980-02-03" ) , 155.0 , null , true ) ;
71
+
72
+ // Add a row using a dictionary. This is more verbose, but very clear.
73
+ table . AddRow ( new Dictionary < string , object > ( ) {
74
+ { "Id" , 3 } ,
75
+ { "Name" , null } ,
76
+ { "Sex" , "M" } ,
77
+ { "Birthdate" , DateTime . Parse ( "1990-03-04" ) } ,
78
+ { "Height" , 180.0 } ,
79
+ { "Weight" , 60.0 } ,
80
+ { "Result" , false }
81
+ } ) ;
82
+
83
+ }
84
+
16
85
[ ExampleMethod ]
17
86
public static void ManipulatingData ( ) {
18
87
@@ -82,8 +151,18 @@ public static void AnalyzingData () {
82
151
table = FrameTable . FromCsv ( reader ) ;
83
152
}
84
153
}
154
+ FrameView view = table . WhereNotNull ( ) ;
155
+
156
+ // Get the column with (zero-based) index 4.
157
+ FrameColumn column4 = view . Columns [ 4 ] ;
158
+ // Get the column named "Height".
159
+ FrameColumn heightsColumn = view . Columns [ "Height" ] ;
160
+ // Even easier way to get the column named "Height".
161
+ FrameColumn alsoHeightsColumn = view [ "Height" ] ;
85
162
86
- SummaryStatistics summary = new SummaryStatistics ( table [ "Height" ] . As < double > ( ) ) ;
163
+ IReadOnlyList < double > heights = view [ "Height" ] . As < double > ( ) ;
164
+
165
+ SummaryStatistics summary = new SummaryStatistics ( view [ "Height" ] . As < double > ( ) ) ;
87
166
Console . WriteLine ( $ "Count = { summary . Count } ") ;
88
167
Console . WriteLine ( $ "Mean = { summary . Mean } ") ;
89
168
Console . WriteLine ( $ "Standard Deviation = { summary . StandardDeviation } ") ;
@@ -92,74 +171,77 @@ public static void AnalyzingData () {
92
171
Console . WriteLine ( $ "Estimated population standard deviation = { summary . PopulationStandardDeviation } ") ;
93
172
94
173
IReadOnlyList < double > maleHeights =
95
- table . Where < string > ( "Sex" , s => s == "M" ) . Columns [ "Height" ] . As < double > ( ) ;
174
+ view . Where < string > ( "Sex" , s => s == "M" ) . Columns [ "Height" ] . As < double > ( ) ;
96
175
IReadOnlyList < double > femaleHeights =
97
- table . Where < string > ( "Sex" , s => s == "F" ) . Columns [ "Height" ] . As < double > ( ) ;
176
+ view . Where < string > ( "Sex" , s => s == "F" ) . Columns [ "Height" ] . As < double > ( ) ;
98
177
TestResult test = Univariate . StudentTTest ( maleHeights , femaleHeights ) ;
99
- Console . WriteLine ( $ "{ test . Statistic . Name } = { test . Statistic . Value } , P = { test . Probability } ") ;
178
+ Console . WriteLine ( $ "{ test . Statistic . Name } = { test . Statistic . Value } ") ;
179
+ Console . WriteLine ( $ "P = { test . Probability } ") ;
100
180
101
181
TestResult maleHeightNormality = maleHeights . ShapiroFranciaTest ( ) ;
102
- TestResult totalHeightNormality = table [ "Height" ] . As < double > ( ) . ShapiroFranciaTest ( ) ;
182
+ TestResult totalHeightNormality = view [ "Height" ] . As < double > ( ) . ShapiroFranciaTest ( ) ;
103
183
TestResult heightCompatibility = Univariate . KolmogorovSmirnovTest ( maleHeights , femaleHeights ) ;
104
184
105
185
LinearRegressionResult fit =
106
- table [ "Weight" ] . As < double > ( ) . LinearRegression ( table [ "Height" ] . As < double > ( ) ) ;
186
+ view [ "Weight" ] . As < double > ( ) . LinearRegression ( view [ "Height" ] . As < double > ( ) ) ;
107
187
Console . WriteLine ( $ "Model weight = ({ fit . Slope } ) * height + ({ fit . Intercept } ).") ;
108
188
Console . WriteLine ( $ "Model explains { fit . RSquared * 100.0 } % of variation.") ;
109
189
110
190
ContingencyTable < string , bool > contingency =
111
- Bivariate . Crosstabs ( table [ "Sex" ] . As < string > ( ) , table [ "Result" ] . As < bool > ( ) ) ;
191
+ Bivariate . Crosstabs ( view [ "Sex" ] . As < string > ( ) , view [ "Result" ] . As < bool > ( ) ) ;
112
192
Console . WriteLine ( $ "Male incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( true , "M" ) } ") ;
113
- Console . WriteLine ( $ "Female incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( false , "F" ) } ") ;
193
+ Console . WriteLine ( $ "Female incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( true , "F" ) } ") ;
114
194
Console . WriteLine ( $ "Log odds ratio = { contingency . Binary . LogOddsRatio } ") ;
115
195
116
- table . AddComputedColumn ( "Bmi" , r => ( ( double ) r [ "Weight" ] ) / MoreMath . Sqr ( ( double ) r [ "Height" ] / 100.0 ) ) ;
117
- table . AddComputedColumn ( "Age" , r=> ( DateTime . Now - ( DateTime ) r [ "Birthdate" ] ) . TotalDays / 365.24 ) ;
196
+ view . AddComputedColumn ( "Bmi" , r => ( ( double ) r [ "Weight" ] ) / MoreMath . Sqr ( ( double ) r [ "Height" ] / 100.0 ) ) ;
197
+ view . AddComputedColumn ( "Age" , r=> ( DateTime . Now - ( DateTime ) r [ "Birthdate" ] ) . TotalDays / 365.24 ) ;
118
198
119
199
MultiLinearLogisticRegressionResult result =
120
- table [ "Result" ] . As < bool > ( ) . MultiLinearLogisticRegression (
121
- table [ "Bmi" ] . As < double > ( ) ,
122
- table [ "Sex" ] . As < string , double > ( s => s == "M" ? 1.0 : 0.0 )
200
+ view [ "Result" ] . As < bool > ( ) . MultiLinearLogisticRegression (
201
+ view [ "Bmi" ] . As < double > ( ) ,
202
+ view [ "Sex" ] . As < string , double > ( s => s == "M" ? 1.0 : 0.0 )
123
203
) ;
124
204
foreach ( Parameter parameter in result . Parameters ) {
125
205
Console . WriteLine ( $ "{ parameter . Name } = { parameter . Estimate } ") ;
126
206
}
127
207
128
- //TestResult ageResultPearson = Bivariate.PearsonRTest(table["Age"].As<double>(), table["Result"].As<double>());
129
- TestResult spearman = Bivariate . SpearmanRhoTest ( table [ "Age" ] . As < double > ( ) , table [ "Result" ] . As < double > ( ) ) ;
208
+ TestResult spearman = Bivariate . SpearmanRhoTest ( view [ "Age" ] . As < double > ( ) , view [ "Result" ] . As < double > ( ) ) ;
130
209
Console . WriteLine ( $ "{ spearman . Statistic . Name } = { spearman . Statistic . Value } P = { spearman . Probability } ") ;
131
210
132
211
}
133
212
134
- public static void ConstructData ( ) {
213
+ public static void ConstructExampleData ( ) {
135
214
136
215
FrameTable table = new FrameTable ( ) ;
137
216
table . AddColumn < int > ( "Id" ) ;
138
217
table . AddColumn < string > ( "Name" ) ;
139
218
table . AddColumn < string > ( "Sex" ) ;
140
219
table . AddColumn < DateTime > ( "Birthdate" ) ;
141
- table . AddColumns < double > ( "Height" , "Weight" ) ;
220
+ table . AddColumn < double > ( "Height" ) ;
221
+ table . AddColumns < double ? > ( "Weight" ) ;
142
222
table . AddColumn < bool > ( "Result" ) ;
143
223
144
- //Random rng = new Random(3);
145
- //Random rng = new Random(314159);
146
- // Random rng = new Random(271828);
147
224
Random rng = new Random ( 1000001 ) ;
148
225
149
- //string[] maleNames = new string[1024];
150
226
string [ ] maleNames = new string [ ] { "Alex" , "Chris" , "David" , "Eric" , "Frederic" , "George" , "Hans" , "Igor" , "John" , "Kevin" , "Luke" , "Mark" , "Oscar" , "Peter" , "Richard" , "Stephan" , "Thomas" , "Vincent" } ;
151
227
AddRows ( table , maleNames , "M" , 175.0 , 12.0 , 24.0 , 3.0 , 1 , rng ) ;
152
228
153
- //string[] femaleNames = new string[1024];
154
229
string [ ] femaleNames = new string [ ] { "Anne" , "Belle" , "Dorothy" , "Elizabeth" , "Fiona" , "Helen" , "Julia" , "Kate" , "Louise" , "Mary" , "Natalie" , "Olivia" , "Ruth" , "Sarah" , "Theresa" , "Viola" } ;
155
230
AddRows ( table , femaleNames , "F" , 160.0 , 10.0 , 24.0 , 3.0 , 0 , rng ) ;
156
231
157
- string path = @"C:\Users\dawright\Documents\example.csv" ;
232
+ // add rows with nulls
233
+ table . AddRow ( table . Rows . Count , null , "M" , DateTime . Parse ( "1970-07-27" ) , 183.0 , 74.0 , false ) ;
234
+ table . AddRow ( table . Rows . Count , "Zoey" , "F" , DateTime . Parse ( "2007-09-17" ) , 138.0 , null , false ) ;
235
+
236
+ string path = @"example.csv" ;
158
237
using ( StreamWriter writer = new StreamWriter ( File . OpenWrite ( path ) ) ) {
159
238
table . ToCsv ( writer ) ;
160
239
}
161
240
Console . WriteLine ( File . Exists ( path ) ) ;
162
241
242
+ string json = JsonConvert . SerializeObject ( table . ToDictionaries ( ) , Formatting . Indented ) ;
243
+ File . WriteAllText ( "example.json" , json ) ;
244
+
163
245
}
164
246
165
247
private static void AddRows ( FrameTable table , IReadOnlyList < string > names , string sex , double meanHeight , double stddevHeight , double meanBmi , double stddevBmi , int flag , Random rng ) {
0 commit comments