@@ -2425,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
2425
2425
.arg4_type = ARG_ANYTHING ,
2426
2426
};
2427
2427
2428
+ static void sk_msg_shift_left (struct sk_msg * msg , int i )
2429
+ {
2430
+ int prev ;
2431
+
2432
+ do {
2433
+ prev = i ;
2434
+ sk_msg_iter_var_next (i );
2435
+ msg -> sg .data [prev ] = msg -> sg .data [i ];
2436
+ } while (i != msg -> sg .end );
2437
+
2438
+ sk_msg_iter_prev (msg , end );
2439
+ }
2440
+
2441
+ static void sk_msg_shift_right (struct sk_msg * msg , int i )
2442
+ {
2443
+ struct scatterlist tmp , sge ;
2444
+
2445
+ sk_msg_iter_next (msg , end );
2446
+ sge = sk_msg_elem_cpy (msg , i );
2447
+ sk_msg_iter_var_next (i );
2448
+ tmp = sk_msg_elem_cpy (msg , i );
2449
+
2450
+ while (i != msg -> sg .end ) {
2451
+ msg -> sg .data [i ] = sge ;
2452
+ sk_msg_iter_var_next (i );
2453
+ sge = tmp ;
2454
+ tmp = sk_msg_elem_cpy (msg , i );
2455
+ }
2456
+ }
2457
+
2458
+ BPF_CALL_4 (bpf_msg_pop_data , struct sk_msg * , msg , u32 , start ,
2459
+ u32 , len , u64 , flags )
2460
+ {
2461
+ u32 i = 0 , l , space , offset = 0 ;
2462
+ u64 last = start + len ;
2463
+ int pop ;
2464
+
2465
+ if (unlikely (flags ))
2466
+ return - EINVAL ;
2467
+
2468
+ /* First find the starting scatterlist element */
2469
+ i = msg -> sg .start ;
2470
+ do {
2471
+ l = sk_msg_elem (msg , i )-> length ;
2472
+
2473
+ if (start < offset + l )
2474
+ break ;
2475
+ offset += l ;
2476
+ sk_msg_iter_var_next (i );
2477
+ } while (i != msg -> sg .end );
2478
+
2479
+ /* Bounds checks: start and pop must be inside message */
2480
+ if (start >= offset + l || last >= msg -> sg .size )
2481
+ return - EINVAL ;
2482
+
2483
+ space = MAX_MSG_FRAGS - sk_msg_elem_used (msg );
2484
+
2485
+ pop = len ;
2486
+ /* --------------| offset
2487
+ * -| start |-------- len -------|
2488
+ *
2489
+ * |----- a ----|-------- pop -------|----- b ----|
2490
+ * |______________________________________________| length
2491
+ *
2492
+ *
2493
+ * a: region at front of scatter element to save
2494
+ * b: region at back of scatter element to save when length > A + pop
2495
+ * pop: region to pop from element, same as input 'pop' here will be
2496
+ * decremented below per iteration.
2497
+ *
2498
+ * Two top-level cases to handle when start != offset, first B is non
2499
+ * zero and second B is zero corresponding to when a pop includes more
2500
+ * than one element.
2501
+ *
2502
+ * Then if B is non-zero AND there is no space allocate space and
2503
+ * compact A, B regions into page. If there is space shift ring to
2504
+ * the rigth free'ing the next element in ring to place B, leaving
2505
+ * A untouched except to reduce length.
2506
+ */
2507
+ if (start != offset ) {
2508
+ struct scatterlist * nsge , * sge = sk_msg_elem (msg , i );
2509
+ int a = start ;
2510
+ int b = sge -> length - pop - a ;
2511
+
2512
+ sk_msg_iter_var_next (i );
2513
+
2514
+ if (pop < sge -> length - a ) {
2515
+ if (space ) {
2516
+ sge -> length = a ;
2517
+ sk_msg_shift_right (msg , i );
2518
+ nsge = sk_msg_elem (msg , i );
2519
+ get_page (sg_page (sge ));
2520
+ sg_set_page (nsge ,
2521
+ sg_page (sge ),
2522
+ b , sge -> offset + pop + a );
2523
+ } else {
2524
+ struct page * page , * orig ;
2525
+ u8 * to , * from ;
2526
+
2527
+ page = alloc_pages (__GFP_NOWARN |
2528
+ __GFP_COMP | GFP_ATOMIC ,
2529
+ get_order (a + b ));
2530
+ if (unlikely (!page ))
2531
+ return - ENOMEM ;
2532
+
2533
+ sge -> length = a ;
2534
+ orig = sg_page (sge );
2535
+ from = sg_virt (sge );
2536
+ to = page_address (page );
2537
+ memcpy (to , from , a );
2538
+ memcpy (to + a , from + a + pop , b );
2539
+ sg_set_page (sge , page , a + b , 0 );
2540
+ put_page (orig );
2541
+ }
2542
+ pop = 0 ;
2543
+ } else if (pop >= sge -> length - a ) {
2544
+ sge -> length = a ;
2545
+ pop -= (sge -> length - a );
2546
+ }
2547
+ }
2548
+
2549
+ /* From above the current layout _must_ be as follows,
2550
+ *
2551
+ * -| offset
2552
+ * -| start
2553
+ *
2554
+ * |---- pop ---|---------------- b ------------|
2555
+ * |____________________________________________| length
2556
+ *
2557
+ * Offset and start of the current msg elem are equal because in the
2558
+ * previous case we handled offset != start and either consumed the
2559
+ * entire element and advanced to the next element OR pop == 0.
2560
+ *
2561
+ * Two cases to handle here are first pop is less than the length
2562
+ * leaving some remainder b above. Simply adjust the element's layout
2563
+ * in this case. Or pop >= length of the element so that b = 0. In this
2564
+ * case advance to next element decrementing pop.
2565
+ */
2566
+ while (pop ) {
2567
+ struct scatterlist * sge = sk_msg_elem (msg , i );
2568
+
2569
+ if (pop < sge -> length ) {
2570
+ sge -> length -= pop ;
2571
+ sge -> offset += pop ;
2572
+ pop = 0 ;
2573
+ } else {
2574
+ pop -= sge -> length ;
2575
+ sk_msg_shift_left (msg , i );
2576
+ }
2577
+ sk_msg_iter_var_next (i );
2578
+ }
2579
+
2580
+ sk_mem_uncharge (msg -> sk , len - pop );
2581
+ msg -> sg .size -= (len - pop );
2582
+ sk_msg_compute_data_pointers (msg );
2583
+ return 0 ;
2584
+ }
2585
+
2586
+ static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2587
+ .func = bpf_msg_pop_data ,
2588
+ .gpl_only = false,
2589
+ .ret_type = RET_INTEGER ,
2590
+ .arg1_type = ARG_PTR_TO_CTX ,
2591
+ .arg2_type = ARG_ANYTHING ,
2592
+ .arg3_type = ARG_ANYTHING ,
2593
+ .arg4_type = ARG_ANYTHING ,
2594
+ };
2595
+
2428
2596
BPF_CALL_1 (bpf_get_cgroup_classid , const struct sk_buff * , skb )
2429
2597
{
2430
2598
return task_get_classid (skb );
@@ -5098,6 +5266,7 @@ bool bpf_helper_changes_pkt_data(void *func)
5098
5266
func == bpf_xdp_adjust_meta ||
5099
5267
func == bpf_msg_pull_data ||
5100
5268
func == bpf_msg_push_data ||
5269
+ func == bpf_msg_pop_data ||
5101
5270
func == bpf_xdp_adjust_tail ||
5102
5271
#if IS_ENABLED (CONFIG_IPV6_SEG6_BPF )
5103
5272
func == bpf_lwt_seg6_store_bytes ||
@@ -5394,6 +5563,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5394
5563
return & bpf_msg_pull_data_proto ;
5395
5564
case BPF_FUNC_msg_push_data :
5396
5565
return & bpf_msg_push_data_proto ;
5566
+ case BPF_FUNC_msg_pop_data :
5567
+ return & bpf_msg_pop_data_proto ;
5397
5568
default :
5398
5569
return bpf_base_func_proto (func_id );
5399
5570
}
0 commit comments