You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat(web-integration): support user expected xpath option for locate methods (#844)
* feat(web-integration): support user expected xpath option for locate methods
* docs(site): xpath option for api
* fix(ci): nx config
* docs(site): update docs
Copy file name to clipboardExpand all lines: apps/site/docs/en/API.mdx
+15-8Lines changed: 15 additions & 8 deletions
Original file line number
Diff line number
Diff line change
@@ -108,7 +108,8 @@ function aiTap(locate: string, options?: Object): Promise<void>;
108
108
109
109
-`locate: string` - A natural language description of the element to tap.
110
110
-`options?: Object` - Optional, a configuration object containing:
111
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
111
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
112
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
112
113
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
113
114
114
115
- Return Value:
@@ -140,7 +141,8 @@ function aiHover(locate: string, options?: Object): Promise<void>;
140
141
141
142
-`locate: string` - A natural language description of the element to hover over.
142
143
-`options?: Object` - Optional, a configuration object containing:
143
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
144
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
145
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
144
146
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
-`text: string` - The final text content that should be placed in the input element. Use blank string to clear the input.
169
171
-`locate: string` - A natural language description of the element to input text into.
170
172
-`options?: Object` - Optional, a configuration object containing:
171
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
173
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
174
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
172
175
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
173
176
-`autoDismissKeyboard?: boolean` - If true, the keyboard will be dismissed after input text, only available in Android. (Default: true)
174
177
@@ -201,7 +204,8 @@ function aiKeyboardPress(
201
204
-`key: string` - The web key to press, e.g. 'Enter', 'Tab', 'Escape', etc. Key Combination is not supported.
202
205
-`locate?: string` - Optional, a natural language description of the element to press the key on.
203
206
-`options?: Object` - Optional, a configuration object containing:
204
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
207
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
208
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
205
209
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
206
210
207
211
- Return Value:
@@ -236,7 +240,8 @@ function aiScroll(
236
240
-`distance: number` - Optional, the distance to scroll in px.
237
241
-`locate?: string` - Optional, a natural language description of the element to scroll on. If not provided, Midscene will perform scroll on the current mouse position.
238
242
-`options?: Object` - Optional, a configuration object containing:
239
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
243
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
244
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
240
245
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
241
246
242
247
- Return Value:
@@ -266,7 +271,8 @@ function aiRightClick(locate: string, options?: Object): Promise<void>;
266
271
267
272
-`locate: string` - A natural language description of the element to right-click on.
268
273
-`options?: Object` - Optional, a configuration object containing:
269
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
274
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
275
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
270
276
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
271
277
272
278
- Return Value:
@@ -286,7 +292,7 @@ await agent.aiRightClick('The file name at the top of the page', {
286
292
287
293
:::tip About the `deepThink` feature
288
294
289
-
The `deepThink` feature is a powerful feature that allows Midscene to call AI model twice to precisely locate the element. It is useful when the AI model find it hard to distinguish the element from its surroundings.
295
+
The `deepThink` feature is a powerful feature that allows Midscene to call AI model twice to precisely locate the element. False by default. It is useful when the AI model find it hard to distinguish the element from its surroundings.
290
296
291
297
:::
292
298
@@ -531,7 +537,8 @@ function aiLocate(
531
537
532
538
-`locate: string` - A natural language description of the element to locate.
533
539
-`options?: Object` - Optional, a configuration object containing:
534
-
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
540
+
-`deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
541
+
-`xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
535
542
-`cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
Copy file name to clipboardExpand all lines: apps/site/docs/en/automate-with-scripts-in-yaml.mdx
+15-5Lines changed: 15 additions & 5 deletions
Original file line number
Diff line number
Diff line change
@@ -206,24 +206,32 @@ tasks:
206
206
207
207
# tap an element located by prompt
208
208
- aiTap: <prompt>
209
-
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element
209
+
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element. False by default.
210
+
xpath: <xpath> # optional, the xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
211
+
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
210
212
cacheable: <boolean> # optional, whether cacheable when enabling [caching feature](./caching.mdx). True by default.
211
213
212
214
# hover an element located by prompt
213
215
- aiHover: <prompt>
214
-
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element
216
+
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element. False by default.
217
+
xpath: <xpath> # optional, the xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
218
+
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
215
219
cacheable: <boolean> # optional, whether cacheable when enabling [caching feature](./caching.mdx). True by default.
216
220
217
221
# input text into an element located by prompt
218
222
- aiInput: <final text content of the input>
219
223
locate: <prompt>
220
-
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element
224
+
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element. False by default.
225
+
xpath: <xpath> # optional, the xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
226
+
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
221
227
cacheable: <boolean> # optional, whether cacheable when enabling [caching feature](./caching.mdx). True by default.
222
228
223
229
# press a key (like Enter, Tab, Escape, etc.) on an element located by prompt
224
230
- aiKeyboardPress: <key>
225
231
locate: <prompt>
226
-
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element
232
+
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element. False by default.
233
+
xpath: <xpath> # optional, the xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
234
+
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
227
235
cacheable: <boolean> # optional, whether cacheable when enabling [caching feature](./caching.mdx). True by default.
228
236
229
237
# scroll globally or on an element located by prompt
@@ -232,7 +240,9 @@ tasks:
232
240
scrollType: 'once'# or 'untilTop' | 'untilBottom' | 'untilLeft' | 'untilRight'
233
241
distance: <number> # optional, distance to scroll in px
234
242
locate: <prompt> # optional, the element to scroll on
235
-
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element
243
+
deepThink: <boolean> # optional, whether to use deepThink to precisely locate the element. False by default.
244
+
xpath: <xpath> # optional, the xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
245
+
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
236
246
cacheable: <boolean> # optional, whether cacheable when enabling [caching feature](./caching.mdx). True by default.
237
247
238
248
# log the current screenshot with a description in the report file
0 commit comments