File tree 4 files changed +56
-8
lines changed
4 files changed +56
-8
lines changed Original file line number Diff line number Diff line change @@ -269,6 +269,20 @@ The :mod:`csv` module defines the following classes:
269
269
270
270
Analyze the sample text (presumed to be in CSV format) and return
271
271
:const: `True ` if the first row appears to be a series of column headers.
272
+ Inspecting each column, one of two key criteria will be considered to
273
+ estimate if the sample contains a header:
274
+
275
+ - the second through n-th rows contain numeric values
276
+ - the second through n-th rows contain strings where at least one value's
277
+ length differs from that of the putative header of that column.
278
+
279
+ Twenty rows after the first row are sampled; if more than half of columns +
280
+ rows meet the criteria, :const: `True ` is returned.
281
+
282
+ .. note ::
283
+
284
+ This method is a rough heuristic and may produce both false positives and
285
+ negatives.
272
286
273
287
An example for :class: `Sniffer ` use::
274
288
Original file line number Diff line number Diff line change @@ -409,14 +409,10 @@ def has_header(self, sample):
409
409
continue # skip rows that have irregular number of columns
410
410
411
411
for col in list (columnTypes .keys ()):
412
-
413
- for thisType in [int , float , complex ]:
414
- try :
415
- thisType (row [col ])
416
- break
417
- except (ValueError , OverflowError ):
418
- pass
419
- else :
412
+ thisType = complex
413
+ try :
414
+ thisType (row [col ])
415
+ except (ValueError , OverflowError ):
420
416
# fallback to length of string
421
417
thisType = len (row [col ])
422
418
Original file line number Diff line number Diff line change @@ -1020,6 +1020,42 @@ class TestSniffer(unittest.TestCase):
1020
1020
'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
1021
1021
"""
1022
1022
1023
+ sample10 = dedent ("""
1024
+ abc,def
1025
+ ghijkl,mno
1026
+ ghi,jkl
1027
+ """ )
1028
+
1029
+ sample11 = dedent ("""
1030
+ abc,def
1031
+ ghijkl,mnop
1032
+ ghi,jkl
1033
+ """ )
1034
+
1035
+ sample12 = dedent (""""time","forces"
1036
+ 1,1.5
1037
+ 0.5,5+0j
1038
+ 0,0
1039
+ 1+1j,6
1040
+ """ )
1041
+
1042
+ sample13 = dedent (""""time","forces"
1043
+ 0,0
1044
+ 1,2
1045
+ a,b
1046
+ """ )
1047
+
1048
+ def test_issue43625 (self ):
1049
+ sniffer = csv .Sniffer ()
1050
+ self .assertTrue (sniffer .has_header (self .sample12 ))
1051
+ self .assertFalse (sniffer .has_header (self .sample13 ))
1052
+
1053
+ def test_has_header_strings (self ):
1054
+ "More to document existing (unexpected?) behavior than anything else."
1055
+ sniffer = csv .Sniffer ()
1056
+ self .assertFalse (sniffer .has_header (self .sample10 ))
1057
+ self .assertFalse (sniffer .has_header (self .sample11 ))
1058
+
1023
1059
def test_has_header (self ):
1024
1060
sniffer = csv .Sniffer ()
1025
1061
self .assertIs (sniffer .has_header (self .sample1 ), False )
Original file line number Diff line number Diff line change
1
+ Fix a bug in the detection of CSV file headers by
2
+ :meth: `csv.Sniffer.has_header ` and improve documentation of same.
You can’t perform that action at this time.
0 commit comments