Skip to content

Commit 978f398

Browse files
authored
Datalabeling beta samples (#1365)
* Datalabeling beta samples * Fixing tests * Fixing tests * Ignoring tests for now due to time duration. * Ignoring tests for now due to time duration. * Ignoring tests for now due to time duration. * debugging * debugging
1 parent d42ffac commit 978f398

18 files changed

+1532
-0
lines changed
+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# DataLabeling API Java examples
2+
3+
These samples demonstrate the use of the [DataLabeling API][https://cloud.google.com/datalabeling/].
4+
5+
These samples show how to perform the following actions:
6+
* create / import a dataset and annotation spec sheet.
7+
* create instructions for the labelers.
8+
* start a labeling task for audio, images, text and video.
9+
* export an annotated dataset.
10+
11+
12+
## Prerequisites
13+
14+
This sample requires you to have java [setup](https://cloud.google.com/java/docs/setup).
15+
16+
17+
## Setup
18+
19+
* Create a project with the [Google Cloud Console][cloud-console], and enable
20+
the [DataLabeling API][datalabeling-api].
21+
* [Set up][auth] authentication. For
22+
example, from the Cloud Console, create a service account,
23+
download its json credentials file, then set the appropriate environment
24+
variable:
25+
26+
```bash
27+
export GOOGLE_CLOUD_PROJECT=PROJECT_ID
28+
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-project-credentials.json
29+
```
30+
31+
[cloud-console]: https://console.cloud.google.com
32+
[datalabeling-api]: https://console.cloud.google.com/apis/library/datalabeling.googleapis.com
33+
[auth]: https://cloud.google.com/docs/authentication/getting-started
34+
35+
## Run the Tests
36+
37+
To verify the API's are enabled, run the unit tests via
38+
39+
```bash
40+
mvn clean verify
41+
```
+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<!--
2+
Copyright 2019 Google LLC
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
-->
13+
<project>
14+
<modelVersion>4.0.0</modelVersion>
15+
<groupId>com.example.datalabeling</groupId>
16+
<artifactId>datalabeling-samples</artifactId>
17+
<packaging>jar</packaging>
18+
19+
<!--
20+
The parent pom defines common style checks and testing strategies for our samples.
21+
Removing or replacing it should not affect the execution of the samples in anyway.
22+
-->
23+
<parent>
24+
<groupId>com.google.cloud.samples</groupId>
25+
<artifactId>shared-configuration</artifactId>
26+
<version>1.0.10</version>
27+
</parent>
28+
29+
<properties>
30+
<maven.compiler.target>1.8</maven.compiler.target>
31+
<maven.compiler.source>1.8</maven.compiler.source>
32+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
33+
</properties>
34+
35+
<dependencies>
36+
<dependency>
37+
<groupId>com.google.cloud</groupId>
38+
<artifactId>google-cloud-datalabeling</artifactId>
39+
<version>0.86.0-alpha</version>
40+
</dependency>
41+
42+
<!-- Test dependencies -->
43+
<dependency>
44+
<groupId>junit</groupId>
45+
<artifactId>junit</artifactId>
46+
<version>4.12</version>
47+
<scope>test</scope>
48+
</dependency>
49+
<dependency>
50+
<groupId>com.google.truth</groupId>
51+
<artifactId>truth</artifactId>
52+
<version>0.42</version>
53+
<scope>test</scope>
54+
</dependency>
55+
</dependencies>
56+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.datalabeling;
18+
19+
// [START datalabeling_create_annotation_spec_set_beta]
20+
import com.google.cloud.datalabeling.v1beta1.AnnotationSpec;
21+
import com.google.cloud.datalabeling.v1beta1.AnnotationSpecSet;
22+
import com.google.cloud.datalabeling.v1beta1.CreateAnnotationSpecSetRequest;
23+
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceClient;
24+
import com.google.cloud.datalabeling.v1beta1.ProjectName;
25+
import java.io.IOException;
26+
import java.util.ArrayList;
27+
import java.util.HashMap;
28+
import java.util.List;
29+
import java.util.Map;
30+
import java.util.Map.Entry;
31+
32+
class CreateAnnotationSpecSet {
33+
34+
// Create an annotation spec set.
35+
static void createAnnotationSpecSet(String projectId) {
36+
// String projectId = "YOUR_PROJECT_ID";
37+
38+
Map<String, String> annotationLabels = new HashMap<>();
39+
annotationLabels.put("label_1", "label_1_description");
40+
annotationLabels.put("label_2", "label_2_description");
41+
42+
try (DataLabelingServiceClient dataLabelingServiceClient = DataLabelingServiceClient.create()) {
43+
44+
ProjectName projectName = ProjectName.of(projectId);
45+
46+
List<AnnotationSpec> annotationSpecs = new ArrayList<>();
47+
for (Entry<String, String> entry : annotationLabels.entrySet()) {
48+
AnnotationSpec annotationSpec = AnnotationSpec.newBuilder()
49+
.setDisplayName(entry.getKey())
50+
.setDescription(entry.getValue())
51+
.build();
52+
annotationSpecs.add(annotationSpec);
53+
}
54+
55+
AnnotationSpecSet annotationSpecSet = AnnotationSpecSet.newBuilder()
56+
.setDisplayName("YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME")
57+
.setDescription("YOUR_DESCRIPTION")
58+
.addAllAnnotationSpecs(annotationSpecs)
59+
.build();
60+
61+
CreateAnnotationSpecSetRequest request = CreateAnnotationSpecSetRequest.newBuilder()
62+
.setAnnotationSpecSet(annotationSpecSet)
63+
.setParent(projectName.toString())
64+
.build();
65+
66+
AnnotationSpecSet result = dataLabelingServiceClient.createAnnotationSpecSet(request);
67+
68+
System.out.format("Name: %s\n", result.getName());
69+
System.out.format("DisplayName: %s\n", result.getDisplayName());
70+
System.out.format("Description: %s\n", result.getDescription());
71+
System.out.format("Annotation Count: %d\n", result.getAnnotationSpecsCount());
72+
73+
for (AnnotationSpec annotationSpec : result.getAnnotationSpecsList()) {
74+
System.out.format("\tDisplayName: %s\n", annotationSpec.getDisplayName());
75+
System.out.format("\tDescription: %s\n\n", annotationSpec.getDescription());
76+
}
77+
} catch (IOException e) {
78+
e.printStackTrace();
79+
}
80+
}
81+
}
82+
// [END datalabeling_create_annotation_spec_set_beta]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.datalabeling;
18+
19+
// [START datalabeling_create_dataset_beta]
20+
import com.google.cloud.datalabeling.v1beta1.CreateDatasetRequest;
21+
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceClient;
22+
import com.google.cloud.datalabeling.v1beta1.Dataset;
23+
import com.google.cloud.datalabeling.v1beta1.ProjectName;
24+
import java.io.IOException;
25+
26+
class CreateDataset {
27+
28+
// Create a dataset that is initially empty.
29+
static void createDataset(String projectId, String datasetName) {
30+
// String projectId = "YOUR_PROJECT_ID";
31+
// String datasetName = "YOUR_DATASET_DISPLAY_NAME";
32+
33+
try (DataLabelingServiceClient dataLabelingServiceClient = DataLabelingServiceClient.create()) {
34+
ProjectName projectName = ProjectName.of(projectId);
35+
36+
Dataset dataset = Dataset.newBuilder()
37+
.setDisplayName(datasetName)
38+
.setDescription("YOUR_DESCRIPTION")
39+
.build();
40+
41+
CreateDatasetRequest createDatasetRequest = CreateDatasetRequest.newBuilder()
42+
.setParent(projectName.toString())
43+
.setDataset(dataset)
44+
.build();
45+
46+
Dataset createdDataset = dataLabelingServiceClient.createDataset(createDatasetRequest);
47+
48+
System.out.format("Name: %s\n", createdDataset.getName());
49+
System.out.format("DisplayName: %s\n", createdDataset.getDisplayName());
50+
System.out.format("Description: %s\n", createdDataset.getDescription());
51+
} catch (IOException e) {
52+
e.printStackTrace();
53+
}
54+
}
55+
}
56+
// [END datalabeling_create_dataset_beta]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.datalabeling;
18+
19+
// [START datalabeling_create_instruction_beta]
20+
import com.google.api.gax.longrunning.OperationFuture;
21+
import com.google.cloud.datalabeling.v1beta1.CreateInstructionMetadata;
22+
import com.google.cloud.datalabeling.v1beta1.CreateInstructionRequest;
23+
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceClient;
24+
import com.google.cloud.datalabeling.v1beta1.DataType;
25+
import com.google.cloud.datalabeling.v1beta1.Instruction;
26+
import com.google.cloud.datalabeling.v1beta1.PdfInstruction;
27+
import com.google.cloud.datalabeling.v1beta1.ProjectName;
28+
import java.io.IOException;
29+
import java.util.concurrent.ExecutionException;
30+
31+
class CreateInstruction {
32+
33+
// Create a instruction for a dataset.
34+
static void createInstruction(String projectId, String pdfUri) {
35+
// String projectId = "YOUR_PROJECT_ID";
36+
// String pdfUri = "gs://YOUR_BUCKET_ID/path_to_pdf_or_csv";
37+
38+
try (DataLabelingServiceClient dataLabelingServiceClient = DataLabelingServiceClient.create()) {
39+
40+
ProjectName projectName = ProjectName.of(projectId);
41+
42+
// There are two types of instructions: CSV (CsvInstruction) or PDF (PdfInstruction)
43+
PdfInstruction pdfInstruction = PdfInstruction.newBuilder()
44+
.setGcsFileUri(pdfUri)
45+
.build();
46+
47+
Instruction instruction = Instruction.newBuilder()
48+
.setDisplayName("YOUR_INSTRUCTION_DISPLAY_NAME")
49+
.setDescription("YOUR_DESCRIPTION")
50+
.setDataType(DataType.IMAGE) // DataTypes: AUDIO, IMAGE, VIDEO, TEXT
51+
.setPdfInstruction(pdfInstruction) // .setCsvInstruction() or .setPdfInstruction()
52+
.build();
53+
54+
CreateInstructionRequest createInstructionRequest = CreateInstructionRequest.newBuilder()
55+
.setInstruction(instruction)
56+
.setParent(projectName.toString())
57+
.build();
58+
59+
OperationFuture<Instruction, CreateInstructionMetadata> operation =
60+
dataLabelingServiceClient.createInstructionAsync(createInstructionRequest);
61+
62+
Instruction result = operation.get();
63+
64+
System.out.format("Name: %s\n", result.getName());
65+
System.out.format("DisplayName: %s\n", result.getDisplayName());
66+
System.out.format("Description: %s\n", result.getDescription());
67+
System.out.format("GCS SOURCE URI: %s\n", result.getPdfInstruction().getGcsFileUri());
68+
} catch (IOException | InterruptedException | ExecutionException e) {
69+
e.printStackTrace();
70+
}
71+
}
72+
}
73+
// [END datalabeling_create_instruction_beta]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.datalabeling;
18+
19+
// [START datalabeling_export_data_beta]
20+
import com.google.api.gax.longrunning.OperationFuture;
21+
import com.google.cloud.datalabeling.v1beta1.DataLabelingServiceClient;
22+
import com.google.cloud.datalabeling.v1beta1.ExportDataOperationMetadata;
23+
import com.google.cloud.datalabeling.v1beta1.ExportDataOperationResponse;
24+
import com.google.cloud.datalabeling.v1beta1.ExportDataRequest;
25+
import com.google.cloud.datalabeling.v1beta1.GcsDestination;
26+
import com.google.cloud.datalabeling.v1beta1.LabelStats;
27+
import com.google.cloud.datalabeling.v1beta1.OutputConfig;
28+
import java.io.IOException;
29+
import java.util.Map.Entry;
30+
import java.util.Set;
31+
import java.util.concurrent.ExecutionException;
32+
33+
class ExportData {
34+
35+
// Export data from an annotated dataset.
36+
static void exportData(String datasetName, String annotatedDatasetName, String gcsOutputUri) {
37+
// String datasetName = DataLabelingServiceClient.formatDatasetName(
38+
// "YOUR_PROJECT_ID", "YOUR_DATASETS_UUID");
39+
// String annotatedDatasetName = DataLabelingServiceClient.formatAnnotatedDatasetName(
40+
// "YOUR_PROJECT_ID",
41+
// "YOUR_DATASET_UUID",
42+
// "YOUR_ANNOTATED_DATASET_UUID");
43+
// String gcsOutputUri = "gs://YOUR_BUCKET_ID/export_path";
44+
45+
try (DataLabelingServiceClient dataLabelingServiceClient = DataLabelingServiceClient.create()) {
46+
GcsDestination gcsDestination = GcsDestination.newBuilder()
47+
.setOutputUri(gcsOutputUri)
48+
.setMimeType("text/csv")
49+
.build();
50+
51+
OutputConfig outputConfig = OutputConfig.newBuilder()
52+
.setGcsDestination(gcsDestination)
53+
.build();
54+
55+
ExportDataRequest exportDataRequest = ExportDataRequest.newBuilder()
56+
.setName(datasetName)
57+
.setOutputConfig(outputConfig)
58+
.setAnnotatedDataset(annotatedDatasetName)
59+
.build();
60+
61+
OperationFuture<ExportDataOperationResponse, ExportDataOperationMetadata> operation =
62+
dataLabelingServiceClient.exportDataAsync(exportDataRequest);
63+
64+
ExportDataOperationResponse response = operation.get();
65+
66+
System.out.format("Exported item count: %d\n", response.getExportCount());
67+
LabelStats labelStats = response.getLabelStats();
68+
Set<Entry<String, Long>> entries = labelStats.getExampleCountMap().entrySet();
69+
for (Entry<String, Long> entry : entries) {
70+
System.out.format("\tLabel: %s\n", entry.getKey());
71+
System.out.format("\tCount: %d\n\n", entry.getValue());
72+
}
73+
} catch (IOException | InterruptedException | ExecutionException e) {
74+
e.printStackTrace();
75+
}
76+
}
77+
}
78+
// [END datalabeling_export_data_beta]

0 commit comments

Comments
 (0)