Skip to content

Commit 61b2c08

Browse files
authored
Merge pull request #14 from wanglei-ok/skip_utf8bom
ReadFileInto to skip a single leading UTF8 BOM sequence if it exists.
2 parents f21d161 + 99a6611 commit 61b2c08

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

read.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gcfg
22

33
import (
4+
"bytes"
45
"fmt"
56
"io"
67
"io/ioutil"
@@ -13,6 +14,7 @@ import (
1314
)
1415

1516
var unescape = map[rune]rune{'\\': '\\', '"': '"', 'n': '\n', 't': '\t'}
17+
var utf8Bom = []byte("\ufeff")
1618

1719
// no error: invalid literals should be caught by scanner
1820
func unquote(s string) string {
@@ -221,6 +223,9 @@ func ReadStringInto(config interface{}, str string) error {
221223

222224
// ReadFileInto reads gcfg formatted data from the file filename and sets the
223225
// values into the corresponding fields in config.
226+
//
227+
// For compatibility with files created on Windows, the ReadFileInto skips a
228+
// single leading UTF8 BOM sequence if it exists.
224229
func ReadFileInto(config interface{}, filename string) error {
225230
f, err := os.Open(filename)
226231
if err != nil {
@@ -231,7 +236,22 @@ func ReadFileInto(config interface{}, filename string) error {
231236
if err != nil {
232237
return err
233238
}
239+
240+
// Skips a single leading UTF8 BOM sequence if it exists.
241+
src = skipLeadingUtf8Bom(src)
242+
234243
fset := token.NewFileSet()
235244
file := fset.AddFile(filename, fset.Base(), len(src))
236245
return readInto(config, fset, file, src)
237246
}
247+
248+
func skipLeadingUtf8Bom(src []byte) []byte {
249+
lengthUtf8Bom := len(utf8Bom)
250+
251+
if len(src) >= lengthUtf8Bom {
252+
if bytes.Equal(src[:lengthUtf8Bom], utf8Bom) {
253+
return src[lengthUtf8Bom:]
254+
}
255+
}
256+
return src
257+
}

read_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gcfg
22

33
import (
4+
"bytes"
45
"encoding"
56
"fmt"
67
"math/big"
@@ -339,6 +340,17 @@ func TestReadFileIntoUnicode(t *testing.T) {
339340
}
340341
}
341342

343+
func TestReadFileIntoNotepad(t *testing.T) {
344+
res := &struct{ X甲 struct{ X乙 string } }{}
345+
err := ReadFileInto(res, "testdata/notepad.ini")
346+
if err != nil {
347+
t.Error(err)
348+
}
349+
if "丁" != res.X甲.X乙 {
350+
t.Errorf("got %q, wanted %q", res.X甲.X乙, "丁")
351+
}
352+
}
353+
342354
func TestReadStringIntoSubsectDefaults(t *testing.T) {
343355
type subsect struct {
344356
Color string
@@ -404,3 +416,28 @@ func TestPanics(t *testing.T) {
404416
testPanic(t, tt.id, tt.config, tt.gcfg)
405417
}
406418
}
419+
420+
var utf8bomtests = []struct {
421+
id string
422+
in []byte
423+
out []byte
424+
}{
425+
{"0 bytes input", []byte{}, []byte{}},
426+
{"3 bytes input (BOM only)", []byte("\ufeff"), []byte{}},
427+
{"3 bytes input (comment only, without BOM)", []byte(";c\n"), []byte(";c\n")},
428+
{"normal input with BOM", []byte("\ufeff[section]\nname=value"), []byte("[section]\nname=value")},
429+
{"normal input without BOM", []byte("[section]\nname=value"), []byte("[section]\nname=value")},
430+
}
431+
432+
func testUtf8Bom(t *testing.T, id string, in, out []byte) {
433+
got := skipLeadingUtf8Bom([]byte(in))
434+
if !bytes.Equal(got, out) {
435+
t.Errorf("%s.", id)
436+
}
437+
}
438+
439+
func TestUtf8Boms(t *testing.T) {
440+
for _, tt := range utf8bomtests {
441+
testUtf8Bom(t, tt.id, tt.in, tt.out)
442+
}
443+
}

testdata/notepad.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
; Comment line
2+
[甲]
3+
乙=丁 # Update 乙 to 丁 by notepad on windows

0 commit comments

Comments
 (0)