|
21 | 21 | from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
22 | 22 | from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
|
23 | 23 | from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
| 24 | +from pydantic import Field, validator |
24 | 25 |
|
25 | 26 | DIALECT_NAME = "_config_dialect"
|
26 | 27 |
|
@@ -164,9 +165,11 @@ async def infer_schema(
|
164 | 165 | # sources will likely require one. Rather than modify the interface now we can wait until the real use case
|
165 | 166 | config_format = _extract_format(config)
|
166 | 167 | type_inferrer_by_field: Dict[str, _TypeInferrer] = defaultdict(
|
167 |
| - lambda: _JsonTypeInferrer(config_format.true_values, config_format.false_values, config_format.null_values) |
168 |
| - if config_format.inference_type != InferenceType.NONE |
169 |
| - else _DisabledTypeInferrer() |
| 168 | + lambda: ( |
| 169 | + _JsonTypeInferrer(config_format.true_values, config_format.false_values, config_format.null_values) |
| 170 | + if config_format.inference_type != InferenceType.NONE |
| 171 | + else _DisabledTypeInferrer() |
| 172 | + ) |
170 | 173 | )
|
171 | 174 | data_generator = self._csv_reader.read_data(config, file, stream_reader, logger, self.file_read_mode)
|
172 | 175 | read_bytes = 0
|
@@ -293,50 +296,35 @@ def _cast_types(
|
293 | 296 |
|
294 | 297 | for key, value in row.items():
|
295 | 298 | prop_type = deduped_property_types.get(key)
|
296 |
| - cast_value: Any = value |
297 |
| - |
298 |
| - if prop_type in TYPE_PYTHON_MAPPING and prop_type is not None: |
| 299 | + if prop_type in TYPE_PYTHON_MAPPING and prop_type: |
299 | 300 | _, python_type = TYPE_PYTHON_MAPPING[prop_type]
|
300 | 301 |
|
301 |
| - if python_type is None: |
302 |
| - if value == "": |
303 |
| - cast_value = None |
304 |
| - else: |
305 |
| - warnings.append(_format_warning(key, value, prop_type)) |
306 |
| - |
307 |
| - elif python_type == bool: |
308 |
| - try: |
| 302 | + try: |
| 303 | + if python_type is None: |
| 304 | + cast_value = None if value == "" else value |
| 305 | + elif python_type == bool: |
309 | 306 | cast_value = _value_to_bool(value, config_format.true_values, config_format.false_values)
|
310 |
| - except ValueError: |
311 |
| - warnings.append(_format_warning(key, value, prop_type)) |
312 |
| - |
313 |
| - elif python_type == dict: |
314 |
| - try: |
315 |
| - # we don't re-use _value_to_object here because we type the column as object as long as there is only one object |
| 307 | + elif python_type == dict: |
316 | 308 | cast_value = json.loads(value)
|
317 |
| - except json.JSONDecodeError: |
318 |
| - warnings.append(_format_warning(key, value, prop_type)) |
319 |
| - |
320 |
| - elif python_type == list: |
321 |
| - try: |
| 309 | + elif python_type == list: |
322 | 310 | cast_value = _value_to_list(value)
|
323 |
| - except (ValueError, json.JSONDecodeError): |
324 |
| - warnings.append(_format_warning(key, value, prop_type)) |
325 |
| - |
326 |
| - elif python_type: |
327 |
| - try: |
| 311 | + else: |
328 | 312 | cast_value = _value_to_python_type(value, python_type)
|
329 |
| - except ValueError: |
330 |
| - warnings.append(_format_warning(key, value, prop_type)) |
331 |
| - |
332 |
| - result[key] = cast_value |
| 313 | + result[key] = cast_value |
| 314 | + except (ValueError, json.JSONDecodeError): |
| 315 | + warnings.append(_format_warning(key, value, prop_type)) |
| 316 | + result[key] = value # fallback to the original value |
333 | 317 |
|
334 | 318 | if warnings:
|
335 |
| - logger.warning( |
336 |
| - f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join([w for w in warnings])}", |
337 |
| - ) |
| 319 | + logger.warning(f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join(warnings)}") |
338 | 320 | return result
|
339 | 321 |
|
| 322 | + @validator("quote_char", "escape_char") |
| 323 | + def validate_single_character(cls, v: str, field: Field) -> str: |
| 324 | + if v is not None and len(v) != 1: |
| 325 | + raise ValueError(f"{field.name} should only be one character") |
| 326 | + return v |
| 327 | + |
340 | 328 |
|
341 | 329 | class _TypeInferrer(ABC):
|
342 | 330 | @abstractmethod
|
|
0 commit comments