18
18
from controllers .service_api .dataset .error import (
19
19
ArchivedDocumentImmutableError ,
20
20
DocumentIndexingError ,
21
- InvalidMetadataError ,
22
21
)
23
22
from controllers .service_api .wraps import DatasetApiResource , cloud_edition_billing_resource_check
24
23
from core .errors .error import ProviderTokenNotInitError
@@ -51,8 +50,6 @@ def post(self, tenant_id, dataset_id):
51
50
"indexing_technique" , type = str , choices = Dataset .INDEXING_TECHNIQUE_LIST , nullable = False , location = "json"
52
51
)
53
52
parser .add_argument ("retrieval_model" , type = dict , required = False , nullable = False , location = "json" )
54
- parser .add_argument ("doc_type" , type = str , required = False , nullable = True , location = "json" )
55
- parser .add_argument ("doc_metadata" , type = dict , required = False , nullable = True , location = "json" )
56
53
57
54
args = parser .parse_args ()
58
55
dataset_id = str (dataset_id )
@@ -65,28 +62,6 @@ def post(self, tenant_id, dataset_id):
65
62
if not dataset .indexing_technique and not args ["indexing_technique" ]:
66
63
raise ValueError ("indexing_technique is required." )
67
64
68
- # Validate metadata if provided
69
- if args .get ("doc_type" ) or args .get ("doc_metadata" ):
70
- if not args .get ("doc_type" ) or not args .get ("doc_metadata" ):
71
- raise InvalidMetadataError ("Both doc_type and doc_metadata must be provided when adding metadata" )
72
-
73
- if args ["doc_type" ] not in DocumentService .DOCUMENT_METADATA_SCHEMA :
74
- raise InvalidMetadataError (
75
- "Invalid doc_type. Must be one of: " + ", " .join (DocumentService .DOCUMENT_METADATA_SCHEMA .keys ())
76
- )
77
-
78
- if not isinstance (args ["doc_metadata" ], dict ):
79
- raise InvalidMetadataError ("doc_metadata must be a dictionary" )
80
-
81
- # Validate metadata schema based on doc_type
82
- if args ["doc_type" ] != "others" :
83
- metadata_schema = DocumentService .DOCUMENT_METADATA_SCHEMA [args ["doc_type" ]]
84
- for key , value in args ["doc_metadata" ].items ():
85
- if key in metadata_schema and not isinstance (value , metadata_schema [key ]):
86
- raise InvalidMetadataError (f"Invalid type for metadata field { key } " )
87
- # set to MetaDataConfig
88
- args ["metadata" ] = {"doc_type" : args ["doc_type" ], "doc_metadata" : args ["doc_metadata" ]}
89
-
90
65
text = args .get ("text" )
91
66
name = args .get ("name" )
92
67
if text is None or name is None :
@@ -133,8 +108,6 @@ def post(self, tenant_id, dataset_id, document_id):
133
108
"doc_language" , type = str , default = "English" , required = False , nullable = False , location = "json"
134
109
)
135
110
parser .add_argument ("retrieval_model" , type = dict , required = False , nullable = False , location = "json" )
136
- parser .add_argument ("doc_type" , type = str , required = False , nullable = True , location = "json" )
137
- parser .add_argument ("doc_metadata" , type = dict , required = False , nullable = True , location = "json" )
138
111
args = parser .parse_args ()
139
112
dataset_id = str (dataset_id )
140
113
tenant_id = str (tenant_id )
@@ -146,29 +119,6 @@ def post(self, tenant_id, dataset_id, document_id):
146
119
# indexing_technique is already set in dataset since this is an update
147
120
args ["indexing_technique" ] = dataset .indexing_technique
148
121
149
- # Validate metadata if provided
150
- if args .get ("doc_type" ) or args .get ("doc_metadata" ):
151
- if not args .get ("doc_type" ) or not args .get ("doc_metadata" ):
152
- raise InvalidMetadataError ("Both doc_type and doc_metadata must be provided when adding metadata" )
153
-
154
- if args ["doc_type" ] not in DocumentService .DOCUMENT_METADATA_SCHEMA :
155
- raise InvalidMetadataError (
156
- "Invalid doc_type. Must be one of: " + ", " .join (DocumentService .DOCUMENT_METADATA_SCHEMA .keys ())
157
- )
158
-
159
- if not isinstance (args ["doc_metadata" ], dict ):
160
- raise InvalidMetadataError ("doc_metadata must be a dictionary" )
161
-
162
- # Validate metadata schema based on doc_type
163
- if args ["doc_type" ] != "others" :
164
- metadata_schema = DocumentService .DOCUMENT_METADATA_SCHEMA [args ["doc_type" ]]
165
- for key , value in args ["doc_metadata" ].items ():
166
- if key in metadata_schema and not isinstance (value , metadata_schema [key ]):
167
- raise InvalidMetadataError (f"Invalid type for metadata field { key } " )
168
-
169
- # set to MetaDataConfig
170
- args ["metadata" ] = {"doc_type" : args ["doc_type" ], "doc_metadata" : args ["doc_metadata" ]}
171
-
172
122
if args ["text" ]:
173
123
text = args .get ("text" )
174
124
name = args .get ("name" )
@@ -216,29 +166,6 @@ def post(self, tenant_id, dataset_id):
216
166
if "doc_language" not in args :
217
167
args ["doc_language" ] = "English"
218
168
219
- # Validate metadata if provided
220
- if args .get ("doc_type" ) or args .get ("doc_metadata" ):
221
- if not args .get ("doc_type" ) or not args .get ("doc_metadata" ):
222
- raise InvalidMetadataError ("Both doc_type and doc_metadata must be provided when adding metadata" )
223
-
224
- if args ["doc_type" ] not in DocumentService .DOCUMENT_METADATA_SCHEMA :
225
- raise InvalidMetadataError (
226
- "Invalid doc_type. Must be one of: " + ", " .join (DocumentService .DOCUMENT_METADATA_SCHEMA .keys ())
227
- )
228
-
229
- if not isinstance (args ["doc_metadata" ], dict ):
230
- raise InvalidMetadataError ("doc_metadata must be a dictionary" )
231
-
232
- # Validate metadata schema based on doc_type
233
- if args ["doc_type" ] != "others" :
234
- metadata_schema = DocumentService .DOCUMENT_METADATA_SCHEMA [args ["doc_type" ]]
235
- for key , value in args ["doc_metadata" ].items ():
236
- if key in metadata_schema and not isinstance (value , metadata_schema [key ]):
237
- raise InvalidMetadataError (f"Invalid type for metadata field { key } " )
238
-
239
- # set to MetaDataConfig
240
- args ["metadata" ] = {"doc_type" : args ["doc_type" ], "doc_metadata" : args ["doc_metadata" ]}
241
-
242
169
# get dataset info
243
170
dataset_id = str (dataset_id )
244
171
tenant_id = str (tenant_id )
@@ -306,29 +233,6 @@ def post(self, tenant_id, dataset_id, document_id):
306
233
if "doc_language" not in args :
307
234
args ["doc_language" ] = "English"
308
235
309
- # Validate metadata if provided
310
- if args .get ("doc_type" ) or args .get ("doc_metadata" ):
311
- if not args .get ("doc_type" ) or not args .get ("doc_metadata" ):
312
- raise InvalidMetadataError ("Both doc_type and doc_metadata must be provided when adding metadata" )
313
-
314
- if args ["doc_type" ] not in DocumentService .DOCUMENT_METADATA_SCHEMA :
315
- raise InvalidMetadataError (
316
- "Invalid doc_type. Must be one of: " + ", " .join (DocumentService .DOCUMENT_METADATA_SCHEMA .keys ())
317
- )
318
-
319
- if not isinstance (args ["doc_metadata" ], dict ):
320
- raise InvalidMetadataError ("doc_metadata must be a dictionary" )
321
-
322
- # Validate metadata schema based on doc_type
323
- if args ["doc_type" ] != "others" :
324
- metadata_schema = DocumentService .DOCUMENT_METADATA_SCHEMA [args ["doc_type" ]]
325
- for key , value in args ["doc_metadata" ].items ():
326
- if key in metadata_schema and not isinstance (value , metadata_schema [key ]):
327
- raise InvalidMetadataError (f"Invalid type for metadata field { key } " )
328
-
329
- # set to MetaDataConfig
330
- args ["metadata" ] = {"doc_type" : args ["doc_type" ], "doc_metadata" : args ["doc_metadata" ]}
331
-
332
236
# get dataset info
333
237
dataset_id = str (dataset_id )
334
238
tenant_id = str (tenant_id )
0 commit comments