@@ -634,6 +634,100 @@ def test_load_table_from_local_avro_file_then_dump_table(self):
634
634
sorted (row_tuples , key = by_wavelength ), sorted (ROWS , key = by_wavelength )
635
635
)
636
636
637
+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
638
+ @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
639
+ def test_load_table_from_dataframe_w_automatic_schema (self ):
640
+ """Test that a DataFrame with dtypes that map well to BigQuery types
641
+ can be uploaded without specifying a schema.
642
+
643
+ https://github.com/googleapis/google-cloud-python/issues/9044
644
+ """
645
+ bool_col = pandas .Series ([True , False , True ], dtype = "bool" )
646
+ ts_col = pandas .Series (
647
+ [
648
+ datetime .datetime (2010 , 1 , 2 , 3 , 44 , 50 ),
649
+ datetime .datetime (2011 , 2 , 3 , 14 , 50 , 59 ),
650
+ datetime .datetime (2012 , 3 , 14 , 15 , 16 ),
651
+ ],
652
+ dtype = "datetime64[ns]" ,
653
+ ).dt .tz_localize (pytz .utc )
654
+ dt_col = pandas .Series (
655
+ [
656
+ datetime .datetime (2010 , 1 , 2 , 3 , 44 , 50 ),
657
+ datetime .datetime (2011 , 2 , 3 , 14 , 50 , 59 ),
658
+ datetime .datetime (2012 , 3 , 14 , 15 , 16 ),
659
+ ],
660
+ dtype = "datetime64[ns]" ,
661
+ )
662
+ float32_col = pandas .Series ([1.0 , 2.0 , 3.0 ], dtype = "float32" )
663
+ float64_col = pandas .Series ([4.0 , 5.0 , 6.0 ], dtype = "float64" )
664
+ int8_col = pandas .Series ([- 12 , - 11 , - 10 ], dtype = "int8" )
665
+ int16_col = pandas .Series ([- 9 , - 8 , - 7 ], dtype = "int16" )
666
+ int32_col = pandas .Series ([- 6 , - 5 , - 4 ], dtype = "int32" )
667
+ int64_col = pandas .Series ([- 3 , - 2 , - 1 ], dtype = "int64" )
668
+ uint8_col = pandas .Series ([0 , 1 , 2 ], dtype = "uint8" )
669
+ uint16_col = pandas .Series ([3 , 4 , 5 ], dtype = "uint16" )
670
+ uint32_col = pandas .Series ([6 , 7 , 8 ], dtype = "uint32" )
671
+ dataframe = pandas .DataFrame (
672
+ {
673
+ "bool_col" : bool_col ,
674
+ "ts_col" : ts_col ,
675
+ "dt_col" : dt_col ,
676
+ "float32_col" : float32_col ,
677
+ "float64_col" : float64_col ,
678
+ "int8_col" : int8_col ,
679
+ "int16_col" : int16_col ,
680
+ "int32_col" : int32_col ,
681
+ "int64_col" : int64_col ,
682
+ "uint8_col" : uint8_col ,
683
+ "uint16_col" : uint16_col ,
684
+ "uint32_col" : uint32_col ,
685
+ },
686
+ columns = [
687
+ "bool_col" ,
688
+ "ts_col" ,
689
+ "dt_col" ,
690
+ "float32_col" ,
691
+ "float64_col" ,
692
+ "int8_col" ,
693
+ "int16_col" ,
694
+ "int32_col" ,
695
+ "int64_col" ,
696
+ "uint8_col" ,
697
+ "uint16_col" ,
698
+ "uint32_col" ,
699
+ ],
700
+ )
701
+
702
+ dataset_id = _make_dataset_id ("bq_load_test" )
703
+ self .temp_dataset (dataset_id )
704
+ table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema" .format (
705
+ Config .CLIENT .project , dataset_id
706
+ )
707
+
708
+ load_job = Config .CLIENT .load_table_from_dataframe (dataframe , table_id )
709
+ load_job .result ()
710
+
711
+ table = Config .CLIENT .get_table (table_id )
712
+ self .assertEqual (
713
+ tuple (table .schema ),
714
+ (
715
+ bigquery .SchemaField ("bool_col" , "BOOLEAN" ),
716
+ bigquery .SchemaField ("ts_col" , "TIMESTAMP" ),
717
+ bigquery .SchemaField ("dt_col" , "DATETIME" ),
718
+ bigquery .SchemaField ("float32_col" , "FLOAT" ),
719
+ bigquery .SchemaField ("float64_col" , "FLOAT" ),
720
+ bigquery .SchemaField ("int8_col" , "INTEGER" ),
721
+ bigquery .SchemaField ("int16_col" , "INTEGER" ),
722
+ bigquery .SchemaField ("int32_col" , "INTEGER" ),
723
+ bigquery .SchemaField ("int64_col" , "INTEGER" ),
724
+ bigquery .SchemaField ("uint8_col" , "INTEGER" ),
725
+ bigquery .SchemaField ("uint16_col" , "INTEGER" ),
726
+ bigquery .SchemaField ("uint32_col" , "INTEGER" ),
727
+ ),
728
+ )
729
+ self .assertEqual (table .num_rows , 3 )
730
+
637
731
@unittest .skipIf (pandas is None , "Requires `pandas`" )
638
732
@unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
639
733
def test_load_table_from_dataframe_w_nulls (self ):
0 commit comments