-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlinux-uek5-v4.14.35-2025.401.4.patch
2251 lines (2099 loc) · 100 KB
/
linux-uek5-v4.14.35-2025.401.4.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
From a185bf6d582ff4e7442a360eaf72d28bfda4416c Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang0129@gmail.com>
Date: Fri, 26 Feb 2021 10:15:51 -0800
Subject: [PATCH 1/1] linux uek5 v4.14.35-2025.401.4
Signed-off-by: Dongli Zhang <dongli.zhang0129@gmail.com>
---
arch/x86/include/asm/kvm_host.h | 23 ++
arch/x86/kvm/mmu.c | 108 ++++++
drivers/net/tap.c | 11 +
drivers/net/virtio_net.c | 600 ++++++++++++++++++++++++++++++++
drivers/virtio/virtio_ring.c | 37 ++
include/linux/gfp.h | 22 ++
include/linux/mm_types.h | 11 +
include/linux/page_ref.h | 3 +
include/linux/skbuff.h | 87 +++++
include/linux/virtio_net.h | 16 +
include/net/sock.h | 18 +
mm/page_alloc.c | 78 +++++
net/core/dev.c | 44 +++
net/core/skbuff.c | 66 ++++
net/core/sock.c | 60 ++++
15 files changed, 1184 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdb517fbc635..d62fcf13efad 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -311,6 +311,12 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ /*
+ * 在以下使用kvm_mmu_page->lpage_disallowed_link:
+ * - arch/x86/kvm/mmu.c|1233| <<account_huge_nx_page>> list_add(&sp->lpage_disallowed_link,
+ * - arch/x86/kvm/mmu.c|1259| <<unaccount_huge_nx_page>> list_del(&sp->lpage_disallowed_link);
+ * - arch/x86/kvm/mmu.c|6427| <<kvm_recover_nx_lpages>> lpage_disallowed_link);
+ */
struct list_head lpage_disallowed_link;
bool unsync;
@@ -851,7 +857,24 @@ struct kvm_arch {
/*
* Hash table of struct kvm_mmu_page.
*/
+ /*
+ * 在以下使用kvm_arch->active_mmu_pages:
+ * - arch/x86/kvm/mmu.c|2153| <<kvm_mmu_alloc_page>> list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+ * - arch/x86/kvm/mmu.c|2800| <<kvm_mmu_prepare_zap_page>> list_move(&sp->link, &kvm->arch.active_mmu_pages);
+ * - arch/x86/kvm/mmu.c|2847| <<prepare_zap_oldest_mmu_page>> if (list_empty(&kvm->arch.active_mmu_pages))
+ * - arch/x86/kvm/mmu.c|2850| <<prepare_zap_oldest_mmu_page>> sp = list_last_entry(&kvm->arch.active_mmu_pages,
+ * - arch/x86/kvm/mmu.c|5981| <<kvm_zap_obsolete_pages>> &kvm->arch.active_mmu_pages, link) {
+ * - arch/x86/kvm/mmu_audit.c|92| <<walk_all_active_sps>> list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
+ * - arch/x86/kvm/x86.c|9187| <<kvm_arch_init_vm>> INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ */
struct list_head active_mmu_pages;
+ /*
+ * 在以下使用kvm_arch->lpage_disallowed_mmu_pages:
+ * - arch/x86/kvm/mmu.c|1200| <<account_huge_nx_page>> list_add(&sp->lpage_disallowed_link, &kvm->arch.lpage_disallowed_mmu_pages);
+ * - arch/x86/kvm/mmu.c|6380| <<kvm_recover_nx_lpages>> while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+ * - arch/x86/kvm/mmu.c|6386| <<kvm_recover_nx_lpages>> sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+ * - arch/x86/kvm/x86.c|9188| <<kvm_arch_init_vm>> INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
+ */
struct list_head lpage_disallowed_mmu_pages;
struct list_head zapped_obsolete_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fa8cd1ef963f..18b787e68776 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -53,7 +53,32 @@
extern bool itlb_multihit_kvm_mitigation;
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|72| <<global>> module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+ * - arch/x86/kvm/mmu.c|73| <<global>> __MODULE_PARM_TYPE(nx_huge_pages, "bool");
+ * - arch/x86/kvm/mmu.c|378| <<is_nx_huge_page_enabled>> return READ_ONCE(nx_huge_pages);
+ * - arch/x86/kvm/mmu.c|6175| <<__set_nx_huge_pages>> nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+ * - arch/x86/kvm/mmu.c|6183| <<set_nx_huge_pages>> bool old_val = nx_huge_pages;
+ * - arch/x86/kvm/mmu.c|6252| <<kvm_mmu_module_init>> if (nx_huge_pages == -1)
+ * - arch/x86/kvm/mmu.c|6348| <<set_nx_huge_pages_recovery_ratio>> if (READ_ONCE(nx_huge_pages) &&
+ * - arch/x86/kvm/mmu.c|6406| <<get_nx_lpage_recovery_timeout>> return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+ *
+ * 大部分时间是1
+ * crash> nx_huge_pages
+ * nx_huge_pages = $1 = 1
+ */
static int __read_mostly nx_huge_pages = -1;
+/*
+ * 在以下使用nx_huge_pages_recovery_ratio:
+ * - arch/x86/kvm/mmu.c|75| <<global>> module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+ * - arch/x86/kvm/mmu.c|76| <<global>> &nx_huge_pages_recovery_ratio, 0644);
+ * - arch/x86/kvm/mmu.c|77| <<global>> __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+ * - arch/x86/kvm/mmu.c|6343| <<set_nx_huge_pages_recovery_ratio>> old_val = nx_huge_pages_recovery_ratio;
+ * - arch/x86/kvm/mmu.c|6349| <<set_nx_huge_pages_recovery_ratio>> !old_val && nx_huge_pages_recovery_ratio) {
+ * - arch/x86/kvm/mmu.c|6378| <<kvm_recover_nx_lpages>> ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+ * - arch/x86/kvm/mmu.c|6406| <<get_nx_lpage_recovery_timeout>> return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+ */
static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
@@ -373,6 +398,14 @@ static inline bool spte_ad_need_write_protect(u64 spte)
return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|3024| <<set_spte>> is_nx_huge_page_enabled()) {
+ * - arch/x86/kvm/mmu.c|3255| <<disallowed_hugepage_adjust>> is_nx_huge_page_enabled() &&
+ * - arch/x86/kvm/mmu.c|3609| <<nonpaging_map>> is_nx_huge_page_enabled();
+ * - arch/x86/kvm/mmu.c|4244| <<tdp_page_fault>> is_nx_huge_page_enabled();
+ * - arch/x86/kvm/paging_tmpl.h|757| <<FNAME(page_fault)>> is_nx_huge_page_enabled();
+ */
static bool is_nx_huge_page_enabled(void)
{
return READ_ONCE(nx_huge_pages);
@@ -1190,6 +1223,11 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|3303| <<__direct_map>> account_huge_nx_page(vcpu->kvm, sp);
+ * - arch/x86/kvm/paging_tmpl.h|677| <<FNAME(fetch)>> account_huge_nx_page(vcpu->kvm, sp);
+ */
static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
if (sp->lpage_disallowed)
@@ -1327,6 +1365,11 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
/*
* Returns the number of pointers in the rmap chain, not counting the new one.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|1503| <<rmap_add>> return pte_list_add(vcpu, spte, rmap_head);
+ * - arch/x86/kvm/mmu.c|2160| <<mmu_page_add_parent_pte>> pte_list_add(vcpu, parent_pte, &sp->parent_ptes);
+ */
static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
struct kvm_rmap_head *rmap_head)
{
@@ -2497,6 +2540,16 @@ static void clear_sp_write_flooding_count(u64 *spte)
__clear_sp_write_flooding_count(sp);
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|3288| <<__direct_map>> sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
+ * - arch/x86/kvm/mmu.c|3732| <<mmu_alloc_direct_roots>> sp = kvm_mmu_get_page(vcpu, 0, 0,
+ * - arch/x86/kvm/mmu.c|3747| <<mmu_alloc_direct_roots>> sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+ * - arch/x86/kvm/mmu.c|3789| <<mmu_alloc_shadow_roots>> sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ * - arch/x86/kvm/mmu.c|3826| <<mmu_alloc_shadow_roots>> sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+ * - arch/x86/kvm/paging_tmpl.h|637| <<FNAME(fetch)>> sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
+ * - arch/x86/kvm/paging_tmpl.h|673| <<FNAME(fetch)>> sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+ */
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -3258,6 +3311,11 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
}
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|3646| <<nonpaging_map>> r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault, false);
+ * - arch/x86/kvm/mmu.c|4286| <<tdp_page_fault>> r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault, lpage_disallowed);
+ */
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
int map_writable, int level, kvm_pfn_t pfn,
bool prefault, bool lpage_disallowed)
@@ -4230,6 +4288,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
+ /*
+ * is_nx_huge_page_enabled()大部分时间是true
+ */
bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
is_nx_huge_page_enabled();
@@ -5119,6 +5180,10 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu.c|5183| <<kvm_init_mmu>> init_kvm_nested_mmu(vcpu);
+ */
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
@@ -6161,6 +6226,9 @@ static void __set_nx_huge_pages(bool val)
nx_huge_pages = itlb_multihit_kvm_mitigation = val;
}
+/*
+ * struct kernel_param_ops nx_huge_pages_ops.set = set_nx_huge_pages()
+ */
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
{
bool old_val = nx_huge_pages;
@@ -6315,6 +6383,9 @@ void kvm_mmu_module_exit(void)
mmu_audit_disable();
}
+/*
+ * struct kernel_param_ops nx_huge_pages_recovery_ratio_ops.set = set_nx_huge_pages_recovery_ratio()
+ */
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
{
unsigned int old_val;
@@ -6340,6 +6411,10 @@ static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel
return err;
}
+/*
+ * 在以下使用kvm_recover_nx_lpages():
+ * - arch/x86/kvm/mmu.c|6422| <<kvm_nx_lpage_recovery_worker>> kvm_recover_nx_lpages(kvm);
+ */
static void kvm_recover_nx_lpages(struct kvm *kvm)
{
int rcu_idx;
@@ -6384,6 +6459,10 @@ static long get_nx_lpage_recovery_timeout(u64 start_time)
: MAX_SCHEDULE_TIMEOUT;
}
+/*
+ * 在以下使用kvm_nx_lpage_recovery_worker():
+ * - arch/x86/kvm/mmu.c|6430| <<kvm_mmu_post_init_vm>> err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+ */
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
{
u64 start_time;
@@ -6409,6 +6488,10 @@ static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
}
}
+/*
+ * called by:
+ * - arch/x86/kvm/x86.c|9223| <<kvm_arch_post_init_vm>> return kvm_mmu_post_init_vm(kvm);
+ */
int kvm_mmu_post_init_vm(struct kvm *kvm)
{
int err;
@@ -6422,8 +6505,33 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
return err;
}
+/*
+ * called by:
+ * - arch/x86/kvm/x86.c|9329| <<kvm_arch_pre_destroy_vm>> kvm_mmu_pre_destroy_vm(kvm);
+ */
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
{
if (kvm->arch.nx_lpage_recovery_thread)
kthread_stop(kvm->arch.nx_lpage_recovery_thread);
}
+
+/*
+ * commit 2ff5f8b23e9a4438c3883b6bb88ad9d51aa6061b
+ * Author: Junaid Shahid <junaids@google.com>
+ * Date: Wed Oct 30 19:06:14 2019 -0400
+ *
+ * kvm: x86: mmu: Recovery of shattered NX large pages
+ *
+ * The page table pages corresponding to broken down large pages are
+ * zapped in FIFO order, so that the large page can potentially
+ * be recovered, if it is no longer being used for execution. This removes
+ * the performance penalty for walking deeper EPT page tables.
+ *
+ * By default, one large page will last about one hour once the guest
+ * reaches a steady state.
+ *
+ * Signed-off-by: Junaid Shahid <junaids@google.com>
+ * Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * CVE: CVE-2018-12207
+ */
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 773a3fea8f0e..1325058cc2d5 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -118,6 +118,9 @@ static const struct proto_ops tap_socket_ops;
#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST)
+/*
+ * 返回net_device->rx_handler_data
+ */
static struct tap_dev *tap_dev_get_rcu(const struct net_device *dev)
{
return rcu_dereference(dev->rx_handler_data);
@@ -314,6 +317,11 @@ void tap_del_queues(struct tap_dev *tap)
}
EXPORT_SYMBOL_GPL(tap_del_queues);
+/*
+ * 在以下使用tap_handle_frame():
+ * - drivers/net/ipvlan/ipvtap.c|93| <<ipvtap_newlink>> err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap);
+ * - drivers/net/macvtap.c|101| <<macvtap_newlink>> err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap);
+ */
rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
@@ -322,6 +330,9 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
struct tap_queue *q;
netdev_features_t features = TAP_FEATURES;
+ /*
+ * 返回net_device->rx_handler_data
+ */
tap = tap_dev_get_rcu(dev);
if (!tap)
return RX_HANDLER_PASS;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9a91ce8037c3..a1d1a3f458bd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -36,6 +36,7 @@
#include <net/xdp.h>
#include <net/net_failover.h>
+/* 默认64 */
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
@@ -45,9 +46,29 @@ module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);
/* FIXME: MTU in config. */
+/*
+ * 在以下使用GOOD_PACKET_LEN:
+ * - drivers/net/virtio_net.c|644| <<receive_small>> unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+ * - drivers/net/virtio_net.c|673| <<receive_small>> buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+ * - drivers/net/virtio_net.c|1086| <<add_recvbuf_small>> int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
+ * - drivers/net/virtio_net.c|1098| <<add_recvbuf_small>> vi->hdr_len + GOOD_PACKET_LEN);
+ * - drivers/net/virtio_net.c|2670| <<mergeable_min_buf_len>> (unsigned int )GOOD_PACKET_LEN);
+ */
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
+/*
+ * 在以下使用GOOD_COPY_LEN:
+ * - drivers/net/virtio_net.c|435| <<page_to_skb>> skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
+ */
#define GOOD_COPY_LEN 128
+/*
+ * 在以下使用VIRTNET_RX_PAD:
+ * - drivers/net/virtio_net.c|642| <<receive_small>> unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
+ * - drivers/net/virtio_net.c|671| <<receive_small>> header_offset = VIRTNET_RX_PAD + xdp_headroom;
+ * - drivers/net/virtio_net.c|686| <<receive_small>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
+ * - drivers/net/virtio_net.c|1086| <<add_recvbuf_small>> int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
+ * - drivers/net/virtio_net.c|1097| <<add_recvbuf_small>> sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
+ */
#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
@@ -58,10 +79,34 @@ module_param(napi_tx, bool, 0644);
* at once, the weight is chosen so that the EWMA will be insensitive to short-
* term, transient changes in packet size.
*/
+/*
+ * Exponentially weighted moving average (EWMA)
+ *
+ * This implements a fixed-precision EWMA algorithm, with both the
+ * precision and fall-off coefficient determined at compile-time
+ * and built into the generated helper funtions.
+ *
+ * The first argument to the macro is the name that will be used
+ * for the struct and helper functions.
+ *
+ * The second argument, the precision, expresses how many bits are
+ * used for the fractional part of the fixed-precision values.
+ *
+ * The third argument, the weight reciprocal, determines how the
+ * new values will be weighed vs. the old state, new values will
+ * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note
+ * that this parameter must be a power of two for efficiency.
+ */
DECLARE_EWMA(pkt_len, 0, 64)
#define VIRTNET_DRIVER_VERSION "1.0.0"
+/*
+ * 在以下使用guest_offloads[]:
+ * - drivers/net/virtio_net.c|3126| <<virtnet_probe>> for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
+ * - drivers/net/virtio_net.c|3127| <<virtnet_probe>> if (virtio_has_feature(vi->vdev, guest_offloads[i]))
+ * - drivers/net/virtio_net.c|3128| <<virtnet_probe>> set_bit(guest_offloads[i], &vi->guest_offloads);
+ */
static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
@@ -77,12 +122,22 @@ struct virtnet_stat_desc {
struct virtnet_sq_stats {
struct u64_stats_sync syncp;
+ /*
+ * 在以下使用virtnet_sq_stats->packets:
+ * - drivers/net/virtio_net.c|1378| <<free_old_xmit_skbs>> sq->stats.packets += packets;
+ * - drivers/net/virtio_net.c|1708| <<virtnet_stats>> tpackets = sq->stats.packets;
+ */
u64 packets;
u64 bytes;
};
struct virtnet_rq_stats {
struct u64_stats_sync syncp;
+ /*
+ * 在以下使用virtnet_rq_stats->packets:
+ * - drivers/net/virtio_net.c|1348| <<virtnet_receive>> rq->stats.packets += received;
+ * - drivers/net/virtio_net.c|1714| <<virtnet_stats>> rpackets = rq->stats.packets;
+ */
u64 packets;
u64 bytes;
};
@@ -100,7 +155,21 @@ static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
{ "bytes", VIRTNET_RQ_STAT(bytes) },
};
+/*
+ * 在以下使用VIRTNET_SQ_STATS_LEN:
+ * - drivers/net/virtio_net.c|2128| <<virtnet_get_strings>> for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
+ * - drivers/net/virtio_net.c|2145| <<virtnet_get_sset_count>> VIRTNET_SQ_STATS_LEN);
+ * - drivers/net/virtio_net.c|2187| <<virtnet_get_ethtool_stats>> for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
+ * - drivers/net/virtio_net.c|2192| <<virtnet_get_ethtool_stats>> idx += VIRTNET_SQ_STATS_LEN;
+ */
#define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
+/*
+ * 在以下使用VIRTNET_RQ_STATS_LEN:
+ * - drivers/net/virtio_net.c|2120| <<virtnet_get_strings>> for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
+ * - drivers/net/virtio_net.c|2144| <<virtnet_get_sset_count>> return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
+ * - drivers/net/virtio_net.c|2173| <<virtnet_get_ethtool_stats>> for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
+ * - drivers/net/virtio_net.c|2178| <<virtnet_get_ethtool_stats>> idx += VIRTNET_RQ_STATS_LEN;
+ */
#define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
/* Internal representation of a send virtqueue */
@@ -134,6 +203,14 @@ struct receive_queue {
struct page *pages;
/* Average packet length for mergeable receive buffers. */
+ /*
+ * 在以下使用receive_queue->mrg_avg_pkt_len:
+ * - drivers/net/virtio_net.c|983| <<receive_mergeable>> ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ * - drivers/net/virtio_net.c|1096| <<receive_mergeable>> ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
+ * - drivers/net/virtio_net.c|1331| <<add_recvbuf_mergeable>> len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+ * - drivers/net/virtio_net.c|2961| <<virtnet_alloc_queues>> ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
+ * - drivers/net/virtio_net.c|3014| <<mergeable_rx_buffer_size_show>> avg = &vi->rq[queue_index].mrg_avg_pkt_len;
+ */
struct ewma_pkt_len mrg_avg_pkt_len;
/* Page frag for packet buffer allocation. */
@@ -143,6 +220,13 @@ struct receive_queue {
struct scatterlist sg[MAX_SKB_FRAGS + 2];
/* Min single buffer size for mergeable buffers case. */
+ /*
+ * 在以下使用receive_queue->min_buf_len:
+ * - drivers/net/virtio_net.c|1319| <<get_mergeable_buf_len>> rq->min_buf_len, PAGE_SIZE - hdr_len);
+ * - drivers/net/virtio_net.c|2893| <<mergeable_min_buf_len>> unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
+ * - drivers/net/virtio_net.c|2895| <<mergeable_min_buf_len>> return max(max(min_buf_len, hdr_len) - hdr_len,
+ * - drivers/net/virtio_net.c|2964| <<virtnet_find_vqs>> vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
+ */
unsigned int min_buf_len;
/* Name of this receive queue: input.$index */
@@ -166,38 +250,127 @@ struct virtnet_info {
struct virtio_device *vdev;
struct virtqueue *cvq;
struct net_device *dev;
+ /* 数组, 每一个元素都是struct, 不是指针 */
struct send_queue *sq;
+ /* 数组, 每一个元素都是struct, 不是指针 */
struct receive_queue *rq;
unsigned int status;
/* Max # of queue pairs supported by the device */
+ /*
+ * 在以下设置virtnet_info->max_queue_pairs:
+ * - drivers/net/virtio_net.c|2892| <<virtnet_probe>> vi->max_queue_pairs = max_queue_pairs;
+ */
u16 max_queue_pairs;
/* # of queue pairs currently used by the driver */
+ /*
+ * 在以下设置virtnet_info->curr_queue_pairs:
+ * - drivers/net/virtio_net.c|1648| <<_virtnet_set_queues>> vi->curr_queue_pairs = queue_pairs;
+ * - drivers/net/virtio_net.c|2889| <<virtnet_probe>> vi->curr_queue_pairs = max_queue_pairs;
+ * - drivers/net/virtio_net.c|2891| <<virtnet_probe>> vi->curr_queue_pairs = num_online_cpus();
+ */
u16 curr_queue_pairs;
/* # of XDP queue pairs currently used by the driver */
+ /*
+ * 在以下使用virtnet_info->xdp_queue_pairs:
+ * - drivers/net/virtio_net.c|425| <<virtnet_xdp_flush>> qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ * - drivers/net/virtio_net.c|441| <<__virtnet_xdp_xmit>> qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ * - drivers/net/virtio_net.c|487| <<virtnet_get_headroom>> return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
+ * - drivers/net/virtio_net.c|1268| <<is_xdp_raw_buffer_queue>> if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+ * - drivers/net/virtio_net.c|2238| <<virtnet_xdp_set>> curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
+ * - drivers/net/virtio_net.c|2268| <<virtnet_xdp_set>> vi->xdp_queue_pairs = xdp_qp;
+ */
u16 xdp_queue_pairs;
/* I like... big packets and I cannot lie! */
+ /*
+ * 在以下使用vi->big_packets:
+ * - drivers/net/virtio_net.c|918| <<receive_buf>> } else if (vi->big_packets) {
+ * - drivers/net/virtio_net.c|928| <<receive_buf>> else if (vi->big_packets)
+ * - drivers/net/virtio_net.c|1120| <<try_fill_recv>> else if (vi->big_packets)
+ * - drivers/net/virtio_net.c|1209| <<virtnet_receive>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2479| <<free_unused_bufs>> } else if (vi->big_packets) {
+ * - drivers/net/virtio_net.c|2507| <<mergeable_min_buf_len>> unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
+ * - drivers/net/virtio_net.c|2541| <<virtnet_find_vqs>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2845| <<virtnet_probe>> vi->big_packets = true;
+ * - drivers/net/virtio_net.c|2881| <<virtnet_probe>> vi->big_packets = true;
+ */
bool big_packets;
/* Host will merge rx buffers for big packets (shake it! shake it!) */
+ /*
+ * 在以下使用virtnet_info->mergeable_rx_bufs:
+ * - drivers/net/virtio_net.c|364| <<page_to_skb>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|384| <<page_to_skb>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|916| <<receive_buf>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|926| <<receive_buf>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|1118| <<try_fill_recv>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|1209| <<virtnet_receive>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|1394| <<xmit_skb>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|2227| <<virtnet_xdp_set>> if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
+ * - drivers/net/virtio_net.c|2477| <<free_unused_bufs>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2541| <<virtnet_find_vqs>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2848| <<virtnet_probe>> vi->mergeable_rx_bufs = true;
+ * - drivers/net/virtio_net.c|2900| <<virtnet_probe>> if (vi->mergeable_rx_bufs)
+ */
bool mergeable_rx_bufs;
/* Has control virtqueue */
+ /*
+ * 在以下使用virtnet_info->has_cvq:
+ * - drivers/net/virtio_net.c|1636| <<_virtnet_set_queues>> if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
+ * - drivers/net/virtio_net.c|2550| <<virtnet_find_vqs>> if (vi->has_cvq) {
+ * - drivers/net/virtio_net.c|2572| <<virtnet_find_vqs>> if (vi->has_cvq) {
+ * - drivers/net/virtio_net.c|2861| <<virtnet_probe>> vi->has_cvq = true;
+ */
bool has_cvq;
/* Host can handle any s/g split between our header and packet data */
+ /*
+ * 在以下设置virtnet_info->any_header_sg:
+ * - drivers/net/virtio_net.c|1738| <<xmit_skb>> can_push = vi->any_header_sg &&
+ * - drivers/net/virtio_net.c|2594| <<virtnet_xdp_set>> if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
+ * - drivers/net/virtio_net.c|3242| <<virtnet_probe>> vi->any_header_sg = true;
+ * - drivers/net/virtio_net.c|3268| <<virtnet_probe>> if (vi->any_header_sg)
+ */
bool any_header_sg;
/* Packet virtio header size */
+ /*
+ * 在以下设置virtnen_info->hdr_len:
+ * - drivers/net/virtio_net.c|3236| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ * - drivers/net/virtio_net.c|3238| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr);
+ */
u8 hdr_len;
/* Work struct for refilling if we run low on memory. */
+ /*
+ * 在以下使用virtnet_info->refill (函数是refill_work()):
+ * - drivers/net/virtio_net.c|1325| <<refill_work>> container_of(work, struct virtnet_info, refill.work);
+ * - drivers/net/virtio_net.c|1340| <<refill_work>> schedule_delayed_work(&vi->refill, HZ/2);
+ * - drivers/net/virtio_net.c|1383| <<virtnet_receive>> schedule_delayed_work(&vi->refill, 0);
+ * - drivers/net/virtio_net.c|1506| <<virtnet_open>> schedule_delayed_work(&vi->refill, 0);
+ * - drivers/net/virtio_net.c|1828| <<_virtnet_set_queues>> schedule_delayed_work(&vi->refill, 0);
+ * - drivers/net/virtio_net.c|1850| <<virtnet_close>> cancel_delayed_work_sync(&vi->refill);
+ * - drivers/net/virtio_net.c|2316| <<virtnet_freeze_down>> cancel_delayed_work_sync(&vi->refill);
+ * - drivers/net/virtio_net.c|2342| <<virtnet_restore_up>> schedule_delayed_work(&vi->refill, 0);
+ * - drivers/net/virtio_net.c|2802| <<virtnet_alloc_queues>> INIT_DELAYED_WORK(&vi->refill, refill_work);
+ * - drivers/net/virtio_net.c|3142| <<virtnet_probe>> cancel_delayed_work_sync(&vi->refill);
+ */
struct delayed_work refill;
/* Work struct for config space updates */
+ /*
+ * 在以下使用virtnet_info->config_work:
+ * - drivers/net/virtio_net.c|2601| <<virtnet_freeze_down>> flush_work(&vi->config_work);
+ * - drivers/net/virtio_net.c|2849| <<virtnet_config_changed_work>> container_of(work, struct virtnet_info, config_work);
+ * - drivers/net/virtio_net.c|2883| <<virtnet_config_changed>> schedule_work(&vi->config_work);
+ * - drivers/net/virtio_net.c|3330| <<virtnet_probe>> INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ * - drivers/net/virtio_net.c|3426| <<virtnet_probe>> schedule_work(&vi->config_work);
+ * - drivers/net/virtio_net.c|3478| <<virtnet_remove>> flush_work(&vi->config_work);
+ */
struct work_struct config_work;
/* Does the affinity hint is set for virtqueues? */
@@ -216,10 +389,22 @@ struct virtnet_info {
unsigned long guest_offloads;
/* failover when STANDBY feature enabled */
+ /*
+ * 在以下使用virtnet_info->failover:
+ * - drivers/net/virtio_net.c|2909| <<virtnet_probe>> vi->failover = net_failover_create(vi->dev);
+ * - drivers/net/virtio_net.c|2910| <<virtnet_probe>> if (IS_ERR(vi->failover))
+ * - drivers/net/virtio_net.c|2955| <<virtnet_probe>> net_failover_destroy(vi->failover);
+ * - drivers/net/virtio_net.c|2990| <<virtnet_remove>> net_failover_destroy(vi->failover);
+ */
struct failover *failover;
};
struct padded_vnet_hdr {
+ /*
+ * struct virtio_net_hdr_mrg_rxbuf hdr:
+ * -> struct virtio_net_hdr hdr;
+ * -> __virtio16 num_buffers; // Number of merged rx buffers
+ */
struct virtio_net_hdr_mrg_rxbuf hdr;
/*
* hdr is in a separate sg buffer, and data sg buffer shares same page
@@ -252,6 +437,15 @@ static int rxq2vq(int rxq)
return rxq * 2;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|504| <<page_to_skb>> hdr = skb_vnet_hdr(skb);
+ * - drivers/net/virtio_net.c|798| <<receive_small>> memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
+ * - drivers/net/virtio_net.c|1106| <<receive_buf>> hdr = skb_vnet_hdr(skb);
+ * - drivers/net/virtio_net.c|1602| <<xmit_skb>> hdr = skb_vnet_hdr(skb);
+ *
+ * 返回skb->cb
+ */
static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
{
return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
@@ -261,6 +455,20 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
* private is used to chain pages for big packets, put the whole
* most recent used list in the beginning for reuse
*/
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|557| <<page_to_skb>> give_pages(rq, page);
+ * - drivers/net/virtio_net.c|829| <<receive_big>> give_pages(rq, page);
+ * - drivers/net/virtio_net.c|1074| <<receive_buf>> give_pages(rq, buf);
+ * - drivers/net/virtio_net.c|1184| <<add_recvbuf_big>> give_pages(rq, list);
+ * - drivers/net/virtio_net.c|1196| <<add_recvbuf_big>> give_pages(rq, list);
+ * - drivers/net/virtio_net.c|1214| <<add_recvbuf_big>> give_pages(rq, first);
+ * - drivers/net/virtio_net.c|2703| <<free_unused_bufs>> give_pages(&vi->rq[i], buf);
+ *
+ * private is used to chain pages for big packets, put the whole
+ * most recent used list in the beginning for reuse
+ * 把page(可能是由page->private串起来多个page)放入receive_queue->pages的头部
+ */
static void give_pages(struct receive_queue *rq, struct page *page)
{
struct page *end;
@@ -271,6 +479,16 @@ static void give_pages(struct receive_queue *rq, struct page *page)
rq->pages = page;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|1181| <<add_recvbuf_big>> first = get_a_page(rq, gfp);
+ * - drivers/net/virtio_net.c|1194| <<add_recvbuf_big>> first = get_a_page(rq, gfp);
+ * - drivers/net/virtio_net.c|2657| <<_free_receive_bufs>> __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+ *
+ * private is used to chain pages for big packets, put the whole
+ * most recent used list in the beginning for reuse
+ * 从receive_queue->pages链表取出一个page, 没有就alloc_page()
+ */
static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
{
struct page *p = rq->pages;
@@ -284,15 +502,37 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
return p;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|382| <<virtqueue_napi_complete>> virtqueue_napi_schedule(napi, vq);
+ * - drivers/net/virtio_net.c|397| <<skb_xmit_done>> virtqueue_napi_schedule(napi, vq);
+ * - drivers/net/virtio_net.c|1255| <<skb_recv_done>> virtqueue_napi_schedule(&rq->napi, rvq);
+ * - drivers/net/virtio_net.c|1267| <<virtnet_napi_enable>> virtqueue_napi_schedule(napi, vq);
+ */
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
+ /*
+ * Test if NAPI routine is already running, and if not mark
+ * it as running. This is used as a condition variable
+ * insure only one NAPI poll instance runs. We also make
+ * sure there is no pending NAPI disable.
+ */
if (napi_schedule_prep(napi)) {
virtqueue_disable_cb(vq);
+ /*
+ * The entry's receive function will be scheduled to run.
+ * Consider using __napi_schedule_irqoff() if hard irqs are masked.
+ */
__napi_schedule(napi);
}
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|1394| <<virtnet_poll>> virtqueue_napi_complete(napi, rq->vq, received);
+ * - drivers/net/virtio_net.c|1442| <<virtnet_poll_tx>> virtqueue_napi_complete(napi, sq->vq, 0);
+ */
static void virtqueue_napi_complete(struct napi_struct *napi,
struct virtqueue *vq, int processed)
{
@@ -300,6 +540,10 @@ static void virtqueue_napi_complete(struct napi_struct *napi,
opaque = virtqueue_enable_cb_prepare(vq);
if (napi_complete_done(napi, processed)) {
+ /*
+ * query pending used buffers
+ * Returns "true" if there are pending used buffers in the queue.
+ */
if (unlikely(virtqueue_poll(vq, opaque)))
virtqueue_napi_schedule(napi, vq);
} else {
@@ -307,6 +551,10 @@ static void virtqueue_napi_complete(struct napi_struct *napi,
}
}
+/*
+ * 在以下使用skb_xmit_done():
+ * - drivers/net/virtio_net.c|2781| <<virtnet_find_vqs>> callbacks[txq2vq(i)] = skb_xmit_done;
+ */
static void skb_xmit_done(struct virtqueue *vq)
{
struct virtnet_info *vi = vq->vdev->priv;
@@ -323,23 +571,42 @@ static void skb_xmit_done(struct virtqueue *vq)
}
#define MRG_CTX_HEADER_SHIFT 22
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|1434| <<add_recvbuf_mergeable>> ctx = mergeable_len_to_ctx(len, headroom);
+ */
static void *mergeable_len_to_ctx(unsigned int truesize,
unsigned int headroom)
{
return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|961| <<receive_mergeable>> unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+ */
static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
{
return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|1070| <<receive_mergeable>> truesize = mergeable_ctx_to_truesize(ctx);
+ * - drivers/net/virtio_net.c|1104| <<receive_mergeable>> truesize = mergeable_ctx_to_truesize(ctx);
+ */
static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
{
return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}
/* Called from bottom half context */
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|819| <<receive_big>> struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
+ * - drivers/net/virtio_net.c|917| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, xdp_page,
+ * - drivers/net/virtio_net.c|969| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+ */
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
@@ -354,12 +621,31 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
p = page_address(page) + offset;
/* copy small packet so we can reuse these pages for small data */
+ /*
+ * Allocate a new sk_buff for use in NAPI receive. This buffer will
+ * attempt to allocate the head from a special reserved region used
+ * only for NAPI Rx allocation. By doing this we can save several
+ * CPU cycles by avoiding having to disable and re-enable IRQs.
+ */
skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
if (unlikely(!skb))
return NULL;
+ /*
+ * struct virtio_net_hdr_mrg_rxbuf {
+ * struct virtio_net_hdr hdr;
+ * __virtio16 num_buffers; // Number of merged rx buffers
+ * };
+ *
+ * 返回skb->cb = struct virtio_net_hdr_mrg_rxbuf
+ */
hdr = skb_vnet_hdr(skb);
+ /*
+ * 在以下设置virtnen_info->hdr_len:
+ * - drivers/net/virtio_net.c|3236| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ * - drivers/net/virtio_net.c|3238| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr);
+ */
hdr_len = vi->hdr_len;
if (vi->mergeable_rx_bufs)
hdr_padded_len = sizeof(*hdr);
@@ -374,6 +660,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
p += hdr_padded_len;
copy = len;
+ /*
+ * skb_tailroom():
+ * Return the number of bytes of free space at the tail of an sk_buff
+ */
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
skb_put_data(skb, p, copy);
@@ -381,6 +671,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
len -= copy;
offset += copy;
+ /*
+ * mergeable_rx_bufs大部分是true
+ */
if (vi->mergeable_rx_bufs) {
if (len)
skb_add_rx_frag(skb, 0, page, offset, len, truesize);
@@ -482,6 +775,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
return 0;
}
+/* 没有设置vi->xdp_queue_pairs就返回 0 */
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
@@ -673,6 +967,12 @@ static struct sk_buff *receive_big(struct net_device *dev,
unsigned int len)
{
struct page *page = buf;
+ /*
+ * called by:
+ * - drivers/net/virtio_net.c|819| <<receive_big>> struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
+ * - drivers/net/virtio_net.c|917| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, xdp_page,
+ * - drivers/net/virtio_net.c|969| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+ */
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
PAGE_SIZE, true);
@@ -687,6 +987,10 @@ static struct sk_buff *receive_big(struct net_device *dev,
return NULL;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|987| <<receive_buf>> skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
+ */
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
@@ -695,6 +999,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int len,
bool *xdp_xmit)
{
+ /*
+ * struct virtio_net_hdr_mrg_rxbuf {
+ * struct virtio_net_hdr hdr;
+ * __virtio16 num_buffers; // Number of merged rx buffers
+ * };
+ */
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
@@ -702,6 +1012,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct sk_buff *head_skb, *curr_skb;
struct bpf_prog *xdp_prog;
unsigned int truesize;
+ /*
+ * 对于非xdp的应该就是0吧
+ */
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
bool sent;
int err;
@@ -811,6 +1124,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
}
rcu_read_unlock();
+ /*
+ * 当时为这个buf分配的size
+ */
truesize = mergeable_ctx_to_truesize(ctx);
if (unlikely(len > truesize)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
@@ -819,6 +1135,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
goto err_skb;
}
+ /*
+ * 在以下调用page_to_skb():
+ * - drivers/net/virtio_net.c|819| <<receive_big>> struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
+ * - drivers/net/virtio_net.c|917| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, xdp_page,
+ * - drivers/net/virtio_net.c|969| <<receive_mergeable>> head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+ */
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
curr_skb = head_skb;
@@ -827,6 +1149,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
while (--num_buf) {
int num_skb_frags;
+ /*
+ * get the next used buffer
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
@@ -872,11 +1199,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
len, truesize);
} else {
+ /*
+ * skb_add_rx_frag()
+ * -> skb_fill_page_desc()
+ * -> __skb_fill_page_desc()
+ * page = compound_head(page);
+ * if (page_is_pfmemalloc(page))
+ * skb->pfmemalloc = true;
+ */
skb_add_rx_frag(curr_skb, num_skb_frags, page,
offset, len, truesize);
}
}
+ /*
+ * 在以下使用receive_queue->mrg_avg_pkt_len:
+ * - drivers/net/virtio_net.c|983| <<receive_mergeable>> ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ * - drivers/net/virtio_net.c|1096| <<receive_mergeable>> ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
+ * - drivers/net/virtio_net.c|1331| <<add_recvbuf_mergeable>> len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+ * - drivers/net/virtio_net.c|2961| <<virtnet_alloc_queues>> ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
+ * - drivers/net/virtio_net.c|3014| <<mergeable_rx_buffer_size_show>> avg = &vi->rq[queue_index].mrg_avg_pkt_len;
+ */
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
return head_skb;
@@ -902,6 +1245,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
return NULL;
}
+/*
+ * called by:
+ * - drivers/net/virtio_net.c|1282| <<virtnet_receive>> bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
+ * - drivers/net/virtio_net.c|1288| <<virtnet_receive>> bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
+ */
static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
{
@@ -910,12 +1258,25 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
struct virtio_net_hdr_mrg_rxbuf *hdr;
int ret;
+ /*
+ * 在以下设置virtnen_info->hdr_len:
+ * - drivers/net/virtio_net.c|3236| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ * - drivers/net/virtio_net.c|3238| <<virtnet_probe>> vi->hdr_len = sizeof(struct virtio_net_hdr);
+ */
if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
+ /*
+ * !!! 这里会增加dev->stats.rx_length_errors++
+ */
dev->stats.rx_length_errors++;
if (vi->mergeable_rx_bufs) {
put_page(virt_to_head_page(buf));
} else if (vi->big_packets) {
+ /*
+ * private is used to chain pages for big packets, put the whole
+ * most recent used list in the beginning for reuse
+ * 把page(可能是由page->private串起来多个page)放入receive_queue->pages的头部
+ */
give_pages(rq, buf);
} else {
put_page(virt_to_head_page(buf));
@@ -923,6 +1284,21 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
return 0;
}
+ /*
+ * 在以下使用virtnet_info->mergeable_rx_bufs:
+ * - drivers/net/virtio_net.c|364| <<page_to_skb>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|384| <<page_to_skb>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|916| <<receive_buf>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|926| <<receive_buf>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|1118| <<try_fill_recv>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|1209| <<virtnet_receive>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|1394| <<xmit_skb>> if (vi->mergeable_rx_bufs)
+ * - drivers/net/virtio_net.c|2227| <<virtnet_xdp_set>> if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
+ * - drivers/net/virtio_net.c|2477| <<free_unused_bufs>> if (vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2541| <<virtnet_find_vqs>> if (!vi->big_packets || vi->mergeable_rx_bufs) {
+ * - drivers/net/virtio_net.c|2848| <<virtnet_probe>> vi->mergeable_rx_bufs = true;