@@ -24,64 +24,78 @@ SOFTWARE.
24
24
*/
25
25
import Foundation
26
26
27
- import SwiftLibXML2
27
+ import libxmlKanna
28
28
29
29
typealias AKRegularExpression = NSRegularExpression
30
+ #if os(Linux) && swift(>=4)
30
31
typealias AKTextCheckingResult = NSTextCheckingResult
32
+ #elseif os(Linux) && swift(>=3)
33
+ typealias AKTextCheckingResult = TextCheckingResult
34
+ #else
35
+ typealias AKTextCheckingResult = NSTextCheckingResult
36
+ #endif
37
+
38
+ public enum CSSError : Error {
39
+ case UnsupportSyntax( String )
40
+ }
31
41
32
42
/**
33
43
CSS
34
44
*/
35
- public struct CSS {
45
+ public enum CSS {
36
46
/**
37
47
CSS3 selector to XPath
38
48
39
49
@param selector CSS3 selector
40
50
41
51
@return XPath
42
52
*/
43
- public static func toXPath( _ selector: String ) -> String ? {
53
+ public static func toXPath( _ css: String ) throws -> String {
54
+ let selectorGroups = css. components ( separatedBy: " , " )
55
+ return try selectorGroups
56
+ . map { try toXPath ( selector: $0) }
57
+ . joined ( separator: " | " )
58
+ }
59
+
60
+ private static func toXPath( selector: String ) throws -> String {
44
61
var xpath = " // "
45
62
var str = selector
46
63
var prev = str
47
64
48
- while str. utf16 . count > 0 {
65
+ while ! str. isEmpty {
49
66
var attributes : [ String ] = [ ]
50
67
var combinator : String = " "
51
-
52
- if let result = matchBlank ( str) {
53
- str = str. substring ( from: str. index ( str. startIndex, offsetBy: result. range. length) )
54
- }
55
-
68
+
69
+ str = str. trimmingCharacters ( in: . whitespaces)
70
+
56
71
// element
57
72
let element = getElement ( & str)
58
-
73
+
59
74
// class / id
60
75
while let attr = getClassId ( & str) {
61
76
attributes. append ( attr)
62
77
}
63
-
78
+
64
79
// attribute
65
80
while let attr = getAttribute ( & str) {
66
81
attributes. append ( attr)
67
82
}
68
-
83
+
69
84
// matchCombinator
70
85
if let combi = genCombinator ( & str) {
71
86
combinator = combi
72
87
}
73
-
88
+
74
89
// generate xpath phrase
75
- let attr = attributes. reduce ( " " ) { $0 . isEmpty ? $1 : $0 + " and " + $1 }
90
+ let attr = attributes. joined ( separator : " and " )
76
91
if attr. isEmpty {
77
92
xpath += " \( element) \( combinator) "
78
93
} else {
79
94
xpath += " \( element) [ \( attr) ] \( combinator) "
80
95
}
81
96
82
97
if str == prev {
83
- print ( " CSS Syntax Error: Unsupport syntax ' \( selector) ' " )
84
- return nil
98
+ throw CSSError . UnsupportSyntax ( selector)
85
99
}
86
100
prev = str
87
101
}
@@ -127,43 +141,47 @@ private func nth_last_child(a: Int, b: Int) -> String {
127
141
return nth ( prefix: " following " , a: a, b: b)
128
142
}
129
143
130
- private let matchBlank = firstMatch ( " ^ \\ s*| \\ s$ " )
131
- private let matchElement = firstMatch ( " ^([a-z0-9 \\ *_-]+)(( \\ |)([a-z0-9 \\ *_-]+))? " )
132
- private let matchClassId = firstMatch ( " ^([#.])([a-z0-9 \\ *_-]+) " )
144
+ private let escapePattern = " (?: \\ \\ ([! \" # \\ $%& \' \\ ( \\ ) \\ * \\ +, \\ ./:;<=> \\ ?@ \\ [ \\ \\ \\ ] \\ ^` \\ { \\ | \\ }~])) "
145
+ private let escapeRepeatPattern = " \( escapePattern) * "
146
+ private let matchElement = firstMatch ( " ^((?:[a-z0-9 \\ *_-]+ \( escapeRepeatPattern) )+)(( \\ |)((?:[a-z0-9 \\ *_-]+ \( escapeRepeatPattern) )+))? " )
147
+ private let matchClassId = firstMatch ( " ^([#.])((?:[a-z0-9 \\ *_-]+ \( escapeRepeatPattern) )+) " )
133
148
private let matchAttr1 = firstMatch ( " ^ \\ [([^ \\ ]]*) \\ ] " )
134
- private let matchAttr2 = firstMatch ( " ^ \\ [ \\ s*([^~ \\ | \\ ^ \\ $ \\ *= \\ s]+) \\ s*([~ \\ | \\ ^ \\ $ \\ *]?=) \\ s*([^ \" ] *)\\ s* \\ ] " )
149
+ private let matchAttr2 = firstMatch ( " ^ \\ [ \\ s*([^~ \\ | \\ ^ \\ $ \\ *= \\ s]+) \\ s*([~ \\ | \\ ^ \\ $ \\ *]?=) \\ s*(. *) \\ s* \\ ] " )
135
150
private let matchAttrN = firstMatch ( " ^:not \\ ((.*? \\ )?) \\ ) " )
136
- private let matchPseudo = firstMatch ( " ^:([ \' ()a-z0-9_+-]+) " )
151
+ private let matchPseudo = firstMatch ( " ^:([ \' \" ()a-z0-9_+-]+) " )
137
152
private let matchCombinator = firstMatch ( " ^ \\ s*([ \\ s>+~,]) \\ s* " )
138
153
private let matchSubNthChild = firstMatch ( " ^(nth-child|nth-last-child) \\ ( \\ s*(odd|even| \\ d+) \\ s* \\ ) " )
139
154
private let matchSubNthChildN = firstMatch ( " ^(nth-child|nth-last-child) \\ ( \\ s*(-? \\ d*)n( \\ + \\ d+)? \\ s* \\ ) " )
140
155
private let matchSubNthOfType = firstMatch ( " nth-of-type \\ ((odd|even| \\ d+) \\ ) " )
141
156
private let matchSubContains = firstMatch ( " contains \\ ([ \" \' ](.*?)[ \" \' ] \\ ) " )
142
- private let matchSubBlank = firstMatch ( " ^ \\ s*$ " )
143
157
144
158
private func substringWithRangeAtIndex( _ result: AKTextCheckingResult , str: String , at: Int ) -> String {
145
159
if result. numberOfRanges > at {
146
- #if os(Linux)
160
+ #if swift(>=4.0) || os(Linux)
147
161
let range = result. range ( at: at)
148
162
#else
149
163
let range = result. rangeAt ( at)
150
164
#endif
151
165
if range. length > 0 {
152
166
let startIndex = str. index ( str. startIndex, offsetBy: range. location)
153
167
let endIndex = str. index ( startIndex, offsetBy: range. length)
154
- return str . substring ( with : startIndex..< endIndex)
168
+ return String ( str [ startIndex..< endIndex] )
155
169
}
156
170
}
157
171
return " "
158
172
}
159
173
174
+ private func escapeCSS( _ text: String ) -> String {
175
+ return text. replacingOccurrences ( of: escapePattern, with: " $1 " , options: . regularExpression, range: nil )
176
+ }
177
+
160
178
private func getElement( _ str: inout String , skip: Bool = true ) -> String {
161
179
if let result = matchElement ( str) {
162
- let ( text, text2) = ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ,
163
- substringWithRangeAtIndex ( result, str: str, at: 4 ) )
180
+ let ( text, text2) = ( escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ) ,
181
+ escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 5 ) ) )
164
182
165
183
if skip {
166
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
184
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
167
185
}
168
186
169
187
// tag with namespace
@@ -181,10 +199,10 @@ private func getElement(_ str: inout String, skip: Bool = true) -> String {
181
199
182
200
private func getClassId( _ str: inout String , skip: Bool = true ) -> String ? {
183
201
if let result = matchClassId ( str) {
184
- let ( attr, text) = ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ,
185
- substringWithRangeAtIndex ( result, str: str, at: 2 ) )
202
+ let ( attr, text) = ( escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ) ,
203
+ escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 2 ) ) )
186
204
if skip {
187
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
205
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
188
206
}
189
207
190
208
if attr. hasPrefix ( " # " ) {
@@ -198,12 +216,12 @@ private func getClassId(_ str: inout String, skip: Bool = true) -> String? {
198
216
199
217
private func getAttribute( _ str: inout String , skip: Bool = true ) -> String ? {
200
218
if let result = matchAttr2 ( str) {
201
- let ( attr, expr, text) = ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ,
219
+ let ( attr, expr, text) = ( escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 1 ) ) ,
202
220
substringWithRangeAtIndex ( result, str: str, at: 2 ) ,
203
- substringWithRangeAtIndex ( result, str: str, at: 3 ) . replacingOccurrences ( of: " [ \' \" ](.*)[ \' \" ] " , with: " $1 " , options: . regularExpression, range: nil ) )
221
+ escapeCSS ( substringWithRangeAtIndex ( result, str: str, at: 3 ) . replacingOccurrences ( of: " [ \' \" ](.*)[ \' \" ] " , with: " $1 " , options: . regularExpression, range: nil ) ) )
204
222
205
223
if skip {
206
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
224
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
207
225
}
208
226
209
227
switch expr {
@@ -225,7 +243,7 @@ private func getAttribute(_ str: inout String, skip: Bool = true) -> String? {
225
243
} else if let result = matchAttr1 ( str) {
226
244
let atr = substringWithRangeAtIndex ( result, str: str, at: 1 )
227
245
if skip {
228
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
246
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
229
247
}
230
248
231
249
return " @ \( atr) "
@@ -237,7 +255,7 @@ private func getAttribute(_ str: inout String, skip: Bool = true) -> String? {
237
255
} else if let result = matchPseudo ( str) {
238
256
let one = substringWithRangeAtIndex ( result, str: str, at: 1 )
239
257
if skip {
240
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
258
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
241
259
}
242
260
243
261
switch one {
@@ -257,8 +275,6 @@ private func getAttribute(_ str: inout String, skip: Bool = true) -> String? {
257
275
return " not(node()) "
258
276
case " root " :
259
277
return " not(parent::*) "
260
- case " last-child " :
261
- return " count(following-sibling::*) = 0 "
262
278
default :
263
279
if let sub = matchSubNthChild ( one) {
264
280
let ( nth, arg1) = ( substringWithRangeAtIndex ( sub, str: one, at: 1 ) ,
@@ -305,21 +321,21 @@ private func getAttrNot(_ str: inout String, skip: Bool = true) -> String? {
305
321
if let result = matchAttrN ( str) {
306
322
var one = substringWithRangeAtIndex ( result, str: str, at: 1 )
307
323
if skip {
308
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
324
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
309
325
}
310
326
311
327
if let attr = getAttribute ( & one, skip: false ) {
312
328
return attr
313
329
} else if let sub = matchElement ( one) {
314
- #if os(Linux)
330
+ #if swift(>=4.0) || os(Linux)
315
331
let range = sub. range ( at: 1 )
316
332
#else
317
333
let range = sub. rangeAt ( 1 )
318
334
#endif
319
335
let startIndex = one. index ( one. startIndex, offsetBy: range. location)
320
336
let endIndex = one. index ( startIndex, offsetBy: range. length)
321
337
322
- let elem = one. substring ( with : startIndex ..< endIndex)
338
+ let elem = one [ startIndex ..< endIndex]
323
339
return " self:: \( elem) "
324
340
} else if let attr = getClassId ( & one) {
325
341
return attr
@@ -332,7 +348,7 @@ private func genCombinator(_ str: inout String, skip: Bool = true) -> String? {
332
348
if let result = matchCombinator ( str) {
333
349
let one = substringWithRangeAtIndex ( result, str: str, at: 1 )
334
350
if skip {
335
- str = str . substring ( from : str. characters . index ( str. startIndex, offsetBy: result. range. length) )
351
+ str = String ( str [ str . index ( str. startIndex, offsetBy: result. range. length) ..< str . endIndex ] )
336
352
}
337
353
338
354
switch one {
@@ -343,11 +359,7 @@ private func genCombinator(_ str: inout String, skip: Bool = true) -> String? {
343
359
case " ~ " :
344
360
return " /following-sibling:: "
345
361
default :
346
- if let _ = matchSubBlank ( one) {
347
- return " // "
348
- } else {
349
- return " | // "
350
- }
362
+ return " // "
351
363
}
352
364
}
353
365
return nil
0 commit comments