@@ -234,7 +234,64 @@ done:
234
234
RET
235
235
236
236
TEXT ·subVW(SB),NOSPLIT,$0
237
- JMP ·subVW_g(SB)
237
+ MOV x+24 (FP), X5
238
+ MOV y+48 (FP), X6
239
+ MOV z+0 (FP), X7
240
+ MOV z_len+8 (FP), X30
241
+
242
+ MOV $4 , X28
243
+ MOV X6, X29 // b = y
244
+
245
+ BEQZ X30, done
246
+ BLTU X30, X28, loop1
247
+
248
+ loop4:
249
+ MOV 0 (X5), X8 // x[0]
250
+ MOV 8 (X5), X11 // x[1]
251
+ MOV 16 (X5), X14 // x[2]
252
+ MOV 24 (X5), X17 // x[3]
253
+
254
+ SUB X29, X8, X10 // z[0] = x[0] - b
255
+ SLTU X10, X8, X29 // next b
256
+
257
+ SUB X29, X11, X13 // z[1] = x[1] - b
258
+ SLTU X13, X11, X29 // next b
259
+
260
+ SUB X29, X14, X16 // z[2] = x[2] - b
261
+ SLTU X16, X14, X29 // next b
262
+
263
+ SUB X29, X17, X19 // z[3] = x[3] - b
264
+ SLTU X19, X17, X29 // next b
265
+
266
+ MOV X10, 0 (X7) // z[0]
267
+ MOV X13, 8 (X7) // z[1]
268
+ MOV X16, 16 (X7) // z[2]
269
+ MOV X19, 24 (X7) // z[3]
270
+
271
+ ADD $32 , X5
272
+ ADD $32 , X7
273
+ SUB $4 , X30
274
+
275
+ BGEU X30, X28, loop4
276
+ BEQZ X30, done
277
+
278
+ loop1:
279
+ MOV 0 (X5), X10 // x
280
+
281
+ SUB X29, X10, X12 // z = x - b
282
+ SLTU X12, X10, X29 // next b
283
+
284
+ MOV X12, 0 (X7) // z
285
+
286
+ ADD $8 , X5
287
+ ADD $8 , X7
288
+ SUB $1 , X30
289
+
290
+ BNEZ X30, loop1
291
+
292
+ done:
293
+ MOV X29, c+56 (FP) // return b
294
+ RET
238
295
239
296
TEXT ·shlVU(SB),NOSPLIT,$0
240
297
JMP ·shlVU_g(SB)
0 commit comments