Skip to content

Commit 133f42c

Browse files
committed
Generally faster parsing
This commit includes an optimization that increases overall performance. The gains are roughly between 20% to 300% depending on the size of the JSON document. Larger documents will see the greates gains, particularly when searching for keys that are deeply embedded, or near the end of the document.
1 parent 92dff34 commit 133f42c

File tree

2 files changed

+130
-34
lines changed

2 files changed

+130
-34
lines changed

README.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ Benchmarks of GJSON alongside [encoding/json](https://golang.org/pkg/encoding/js
438438
and [json-iterator](https://github.com/json-iterator/go)
439439

440440
```
441-
BenchmarkGJSONGet-10 14919366 240.9 ns/op 0 B/op 0 allocs/op
441+
BenchmarkGJSONGet-10 17893731 202.1 ns/op 0 B/op 0 allocs/op
442442
BenchmarkGJSONUnmarshalMap-10 1663548 2157 ns/op 1920 B/op 26 allocs/op
443443
BenchmarkJSONUnmarshalMap-10 832236 4279 ns/op 2920 B/op 68 allocs/op
444444
BenchmarkJSONUnmarshalStruct-10 1076475 3219 ns/op 920 B/op 12 allocs/op
@@ -489,6 +489,4 @@ widget.text.onMouseUp
489489

490490
**
491491

492-
Last run: Oct 1, 2024
493-
494492
*These benchmarks were run on a MacBook Pro M1 Max using Go 1.22 and can be found [here](https://github.com/tidwall/gjson-benchmarks).*

gjson.go

+129-31
Original file line numberDiff line numberDiff line change
@@ -1040,50 +1040,148 @@ func parseObjectPath(path string) (r objectPathResult) {
10401040
return
10411041
}
10421042

1043+
var vchars = [256]byte{
1044+
'"': 2, '{': 3, '(': 3, '[': 3, '}': 1, ')': 1, ']': 1,
1045+
}
1046+
10431047
func parseSquash(json string, i int) (int, string) {
10441048
// expects that the lead character is a '[' or '{' or '('
10451049
// squash the value, ignoring all nested arrays and objects.
10461050
// the first '[' or '{' or '(' has already been read
10471051
s := i
10481052
i++
10491053
depth := 1
1050-
for ; i < len(json); i++ {
1051-
if json[i] >= '"' && json[i] <= '}' {
1052-
switch json[i] {
1053-
case '"':
1054+
var c byte
1055+
for i < len(json) {
1056+
for i < len(json)-8 {
1057+
jslice := json[i : i+8]
1058+
c = vchars[jslice[0]]
1059+
if c != 0 {
1060+
i += 0
1061+
goto token
1062+
}
1063+
c = vchars[jslice[1]]
1064+
if c != 0 {
1065+
i += 1
1066+
goto token
1067+
}
1068+
c = vchars[jslice[2]]
1069+
if c != 0 {
1070+
i += 2
1071+
goto token
1072+
}
1073+
c = vchars[jslice[3]]
1074+
if c != 0 {
1075+
i += 3
1076+
goto token
1077+
}
1078+
c = vchars[jslice[4]]
1079+
if c != 0 {
1080+
i += 4
1081+
goto token
1082+
}
1083+
c = vchars[jslice[5]]
1084+
if c != 0 {
1085+
i += 5
1086+
goto token
1087+
}
1088+
c = vchars[jslice[6]]
1089+
if c != 0 {
1090+
i += 6
1091+
goto token
1092+
}
1093+
c = vchars[jslice[7]]
1094+
if c != 0 {
1095+
i += 7
1096+
goto token
1097+
}
1098+
i += 8
1099+
}
1100+
c = vchars[json[i]]
1101+
if c == 0 {
1102+
i++
1103+
continue
1104+
}
1105+
token:
1106+
if c == 2 {
1107+
// '"' string
1108+
i++
1109+
s2 := i
1110+
nextquote:
1111+
for i < len(json)-8 {
1112+
jslice := json[i : i+8]
1113+
if jslice[0] == '"' {
1114+
i += 0
1115+
goto strchkesc
1116+
}
1117+
if jslice[1] == '"' {
1118+
i += 1
1119+
goto strchkesc
1120+
}
1121+
if jslice[2] == '"' {
1122+
i += 2
1123+
goto strchkesc
1124+
}
1125+
if jslice[3] == '"' {
1126+
i += 3
1127+
goto strchkesc
1128+
}
1129+
if jslice[4] == '"' {
1130+
i += 4
1131+
goto strchkesc
1132+
}
1133+
if jslice[5] == '"' {
1134+
i += 5
1135+
goto strchkesc
1136+
}
1137+
if jslice[6] == '"' {
1138+
i += 6
1139+
goto strchkesc
1140+
}
1141+
if jslice[7] == '"' {
1142+
i += 7
1143+
goto strchkesc
1144+
}
1145+
i += 8
1146+
}
1147+
goto strchkstd
1148+
strchkesc:
1149+
if json[i-1] != '\\' {
10541150
i++
1055-
s2 := i
1056-
for ; i < len(json); i++ {
1057-
if json[i] > '\\' {
1058-
continue
1059-
}
1060-
if json[i] == '"' {
1061-
// look for an escaped slash
1062-
if json[i-1] == '\\' {
1063-
n := 0
1064-
for j := i - 2; j > s2-1; j-- {
1065-
if json[j] != '\\' {
1066-
break
1067-
}
1068-
n++
1069-
}
1070-
if n%2 == 0 {
1071-
continue
1072-
}
1151+
continue
1152+
}
1153+
strchkstd:
1154+
for i < len(json) {
1155+
if json[i] > '\\' || json[i] != '"' {
1156+
i++
1157+
continue
1158+
}
1159+
// look for an escaped slash
1160+
if json[i-1] == '\\' {
1161+
n := 0
1162+
for j := i - 2; j > s2-1; j-- {
1163+
if json[j] != '\\' {
1164+
break
10731165
}
1074-
break
1166+
n++
1167+
}
1168+
if n%2 == 0 {
1169+
i++
1170+
goto nextquote
10751171
}
10761172
}
1077-
case '{', '[', '(':
1078-
depth++
1079-
case '}', ']', ')':
1080-
depth--
1081-
if depth == 0 {
1082-
i++
1083-
return i, json[s:i]
1084-
}
1173+
break
1174+
}
1175+
} else {
1176+
// '{', '[', '(', '}', ']', ')'
1177+
// open close tokens
1178+
depth += int(c) - 2
1179+
if depth == 0 {
1180+
i++
1181+
return i, json[s:i]
10851182
}
10861183
}
1184+
i++
10871185
}
10881186
return i, json[s:]
10891187
}

0 commit comments

Comments
 (0)