Skip to content

Commit 6d9cd7f

Browse files
authored
Merge pull request #1149 from apache/dev-postgresql
merge development branch to master branch for V4.2
2 parents ba51f34 + f0d23a9 commit 6d9cd7f

File tree

258 files changed

+12306
-7713
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

258 files changed

+12306
-7713
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ test/samples/
2929
# Sphinx and Doxygen Doc-Site
3030
doc/_build/*
3131
doc/en/docs/model_zoo/
32+
cmake-build-debug/*

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
2929
#string(REGEX REPLACE "^[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" VERSION_PATCH "${VERSION}")
3030

3131

32-
SET(PACKAGE_VERSION 4.1.0) # ${VERSION})
33-
SET(VERSION 4.1.0)
32+
SET(PACKAGE_VERSION 4.2.0) # ${VERSION})
33+
SET(VERSION 4.2.0)
3434
SET(SINGA_MAJOR_VERSION 4)
35-
SET(SINGA_MINOR_VERSION 1)
35+
SET(SINGA_MINOR_VERSION 2)
3636
SET(SINGA_PATCH_VERSION 0)
3737
#SET(SINGA_MAJOR_VERSION ${VERSION_MAJOR}) # 0 -
3838
#SET(SINGA_MINOR_VERSION ${VERSION_MINOR}) # 0 - 9

NOTICE

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,4 @@ developers of Apache SINGA under Apache License, Version 2.0.
3030
./doc/_static/images/sgd.png
3131
./doc/_static/images/singa.png
3232
./doc/_static/images/singav1-sw.png
33-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174425377.png
34-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/documents/image-20231020174945226.png
35-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421214835152.png
36-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220338391.png
37-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230421220443231.png
38-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035554579.png
39-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035622198.png
40-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035639502.png
41-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230702035806963.png
42-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722202555763.png
43-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230722205244718.png
44-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111325368.png
45-
./examples/model_selection/TRAILS-Database-Native-Model-Selection/internal/ml/model_selection/documents/imgs/image-20230724111659545.png
33+
./examples/model_selection/Trails/documents/ai_db.001.jpeg

RELEASE_NOTES

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,33 @@
1+
Release Notes - SINGA - Version singa-4.2.0
2+
3+
SINGA is a distributed deep learning library.
4+
5+
This release includes following changes:
6+
7+
* Add support for deep learning models running on top of PolarDB
8+
* Implement efficient model selection for a given dataset stored in the database.
9+
* Add support for dynamic model creation.
10+
* Add support for flexible setting of model training configurations.
11+
* Optimize the in-database analytics modules for scalability, efficiency and memory consumption.
12+
13+
* New example
14+
* Add a horizontal federated learning example using the Bank dataset.
15+
16+
* Enhance examples
17+
* Add sample training data for testing the model selection application.
18+
19+
* Update the website
20+
* Update the star button in the main page.
21+
* Refine the display of star statistics.
22+
23+
* Update the python versions for wheel files
24+
25+
* Fix bugs
26+
* Fix the rat check files.
27+
* Update the license files.
28+
29+
----------------------------------------------------------------------------------------------
30+
131
Release Notes - SINGA - Version singa-4.1.0
232

333
SINGA is a distributed deep learning library.

examples/hfl/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with < this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
21+
# Horizontal Federated Learning Example
22+
23+
This is an example of federated learning (FL) using the Singa framework. In FL, there is a server and a set of clients. Each client has a local dataset.
24+
In each iteration, each client trains the model using its local dataset and uploads the model gradient to the server, which aggregates to get the global
25+
gradient using the Federated Average algorithm. The server sends the global gradient to all clients for iterative model training.
26+
This example uses the Bank dataset and an MLP model in FL.
27+
28+
## Preparation
29+
30+
Go to the Conda environment that contains the Singa library, and run
31+
32+
```bash
33+
pip install -r requirements.txt
34+
```
35+
36+
Download the bank dataset and split it into 3 partitions.
37+
38+
```bash
39+
# 1. download the data from https://archive.ics.uci.edu/ml/datasets/bank+marketing
40+
# 2. put it under the /data folder
41+
# 3. run the following command which:
42+
# (1) splits the dataset into N subsets
43+
# (2) splits each subsets into train set and test set (8:2)
44+
python -m bank N
45+
```
46+
47+
## Run the example
48+
49+
Run the server first (set the number of epochs to 3)
50+
51+
```bash
52+
python -m src.server -m 3 --num_clients 3
53+
```
54+
55+
Then, start 3 clients in different terminal
56+
57+
```bash
58+
python -m src.client --model mlp --data bank -m 3 -i 0 -d non-iid
59+
python -m src.client --model mlp --data bank -m 3 -i 1 -d non-iid
60+
python -m src.client --model mlp --data bank -m 3 -i 2 -d non-iid
61+
```
62+
63+
Finally, the server and clients finish the FL training.

examples/hfl/config/.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Default ignored files
2+
/shelf/
3+
/workspace.xml
4+
# Datasource local storage ignored files
5+
/dataSources/
6+
/dataSources.local.xml
7+
# Editor-based HTTP Client requests
8+
/httpRequests/

examples/hfl/config/Singa-HFL.iml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with < this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
<?xml version="1.0" encoding="UTF-8"?>
21+
<module type="PYTHON_MODULE" version="4">
22+
<component name="NewModuleRootManager">
23+
<content url="file://$MODULE_DIR$">
24+
<excludeFolder url="file://$MODULE_DIR$/venv" />
25+
</content>
26+
<orderEntry type="inheritedJdk" />
27+
<orderEntry type="sourceFolder" forTests="false" />
28+
</component>
29+
</module>

examples/hfl/data/.gitkeep

Whitespace-only changes.

examples/hfl/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pandas
2+
scikit-learn
3+
protobuf

examples/hfl/src/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+

examples/hfl/src/bank.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
# https://github.com/zhengzangw/Fed-SINGA/blob/main/src/client/data/bank.py
21+
22+
import pandas as pd
23+
import numpy as np
24+
import sys
25+
from pandas.api.types import is_numeric_dtype
26+
from sklearn.model_selection import train_test_split
27+
from sklearn.utils import shuffle
28+
29+
30+
def encode(df):
31+
res = pd.DataFrame()
32+
for col in df.columns.values:
33+
if not is_numeric_dtype(df[col]):
34+
tmp = pd.get_dummies(df[col], prefix=col)
35+
else:
36+
tmp = df[col]
37+
res = pd.concat([res, tmp], axis=1)
38+
return res
39+
40+
41+
def load(device_id):
42+
fn_train = "data/bank_train_" + str(device_id) + ".csv"
43+
fn_test = "data/bank_test_" + str(device_id) + ".csv"
44+
45+
train = pd.read_csv(fn_train, sep=',')
46+
test = pd.read_csv(fn_test, sep=',')
47+
48+
train_x = train.drop(['y'], axis=1)
49+
train_y = train['y']
50+
val_x = test.drop(['y'], axis=1)
51+
val_y = test['y']
52+
53+
train_x = np.array((train_x), dtype=np.float32)
54+
val_x = np.array((val_x), dtype=np.float32)
55+
train_y = np.array((train_y), dtype=np.int32)
56+
val_y = np.array((val_y), dtype=np.int32)
57+
58+
train_x, val_x = normalize(train_x, val_x)
59+
num_classes = 2
60+
61+
return train_x, train_y, val_x, val_y, num_classes
62+
63+
64+
def normalize(X_train, X_test):
65+
from sklearn.preprocessing import MinMaxScaler
66+
scaler = MinMaxScaler()
67+
X_train_scaled = scaler.fit_transform(X_train)
68+
X_test_scaled = scaler.transform(X_test)
69+
return X_train_scaled, X_test_scaled
70+
71+
72+
def split(num):
73+
filepath = "../data/bank-additional-full.csv"
74+
df = pd.read_csv(filepath, sep=';')
75+
df['y'] = (df['y'] == 'yes').astype(int)
76+
data = encode(df)
77+
data = shuffle(data)
78+
train, test = train_test_split(data, test_size=0.2)
79+
80+
train.to_csv("data/bank_train_.csv", index=False)
81+
test.to_csv("data/bank_test_.csv", index=False)
82+
83+
train_per_client = len(train) // num
84+
test_per_client = len(test) // num
85+
86+
print("train_per_client:", train_per_client)
87+
print("test_per_client:", test_per_client)
88+
for i in range(num):
89+
sub_train = train[i * train_per_client:(i + 1) * train_per_client]
90+
sub_test = test[i * test_per_client:(i + 1) * test_per_client]
91+
sub_train.to_csv("data/bank_train_" + str(i) + ".csv", index=False)
92+
sub_test.to_csv("data/bank_test_" + str(i) + ".csv", index=False)
93+
94+
95+
if __name__ == "__main__":
96+
split(int(sys.argv[1]))
97+

0 commit comments

Comments
 (0)