-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathindex.html
1234 lines (1178 loc) · 56.3 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport"
content="width=device-width, height=device-height, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
<title>Performance tricks I learned from contributing to the Azure .NET SDK</title>
<meta name="author" content="Daniel Marbach">
<link rel="stylesheet" href="dist/reset.css">
<link rel="stylesheet" href="dist/reveal.css">
<link rel="stylesheet" href="dist/theme/black.css">
<link rel="stylesheet" href="css/font-awesome.min.css">
<link rel="stylesheet" href="dist/custom.css">
<!-- Theme used for syntax highlighted code -->
<link rel="stylesheet" href="plugin/highlight/monokai.css">
</head>
<body>
<div class="reveal">
<div class="slides">
<section data-background="img/juggler-trick-magician-juggle-1216853.jpg">
<div style="left: 10; width: 52%; padding: 25px; font-size: 50px; text-align: left;">
<h3 style="text-transform: none;color: rgba(31, 129, 74, 0.9);">Performance tricks I learned from contributing to the Azure .NET SDK</h3>
<p style="text-transform: none;color: rgba(31, 129, 74, 0.6); font-size: 30px;">
<i class="fa fa-twitter" aria-hidden="true"><a
style="text-transform: none;color: rgba(31, 129, 74, 0.6)"
href="https://twitter.com/danielmarbach"> danielmarbach</a> |</i>
<i class="fa fa-envelope" aria-hidden="true"> <a
style="text-transform: none;color: rgba(31, 129, 74, 0.6)"
href="mailto:mailto:daniel.marbach@particular.net">daniel.marbach@particular.net</a></i>
</p>
</div>
<!-- Short Version -->
<aside class="notes">
<ul>
<li>As a practical learner I found the Azure SDK repository the perfect spot to learn and apply
interesting performance optimization techniques</li>
<li>Over 90 contributions over the years</li>
<li>In this talk I have summarized some of the key learnings of my adventures in the Azure SDK
repository so that you can have a headstart should you wish to start applying performance
optimization techniques in your code bases.</li>
</ul>
</aside>
</section>
<section>
<section data-background="img/architecture-g104d6d62d_1280.jpg">
<div class="image-slide-box">
<h2>Introduction</h2>
<p>Focus on performance optimization in .NET Code and not architecture.</p>
</div>
<aside class="notes">
<ul>
<li>Not about horizontal and vertical scaling</li>
<li>Not about tools like Benchmark.NET or memory and tracing profilers.</li>
<li>Performance optimizations that can be done in code</li>
<li>.NET has been evolving over the years into a modern and high-performance platform.</li>
<li>C/C++ is less and less needed to achieve code that performs well at scale</li>
</ul>
</aside>
</section>
<section data-background="img/tarot-g4ac5fb817_1280.jpg">
<div class="image-slide-box">
<h2>Esoteric</h2>
<p>Being called out for premature optimizations.</p>
</div>
<aside class="notes">
<ul>
<li>"Wow, that's crazy, is the complexity of this change really worth it? Isn't that
premature optimization?" </li>
<li>Some optimizations shown here are sometimes called out as esoteric.
</li>
<li>Don't jump to conclusions and apply them blindly.</li>
<li>Performance improvements can be addictive, nobody likes to optimize code that is fast
enough or only executed once a day.</li>
<li>Code executed under scale it matters: More efficient in resource usage, execution time,
throughput and memory usage</li>
</ul>
</aside>
</section>
</section>
<section data-background="img/owl-1576572_1280.jpg">
<div
style="margin-left: auto; margin-right: 10; width: 35%; box-shadow: 0 1px 4px rgba(0,0,0,0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(0, 0, 0, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: left;">
<h5>At Scale implementation details matter</h5>
<div style="font-style: italic;">
<p>“Scale for an application can mean the number of users that will concurrently connect
to the application at any given time, the amount of input to process or the number of times
data needs to be processed.</p>
<p>For us, as engineers, it means we have to know <b>what to ignore</b> and knowing <b>what to
pay close attention to</b>.” <a
href="https://speakerdeck.com/davidfowl/implementation-details-matter">David Fowler</a>
</p>
</div>
</div>
<aside class="notes">
<ul>
<li>Discover the assumptions that have accumulated over time</li>
<li>Pay close attention to what is instantiated, parsed, processed etc. per request</li>
<li>How those assumptions in the code base affect the performance characteristics (memory,
throughput...) at scale</li>
<li>As developers and engineers we want rules that we can apply. Relying on rules for the
majority of cases helps us to know when and what to apply..</li>
</ul>
</aside>
</section>
<section>
<section data-background="img/office-4249395_1280.jpg">
<aside class="notes">
As developers and engineers we want rules that we can apply. I have compiled them based on my
experience.
</aside>
</section>
<section>
<ul class="r-fit-text">
<li>Avoid excessive allocations to reduce the GC overhead</li>
<li>Avoid unnecessary copying of memory</li>
</ul>
<aside class="notes">
These are the two high level categories we are going to focus on. For each section I will
introduce a few handy rules that should help you make the right tradeoffs.
</aside>
</section>
</section>
<section>
<section data-background="img/excess-g121ed821c_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid excessive allocations to reduce the GC overhead</h2>
<p>Think at least twice before using LINQ or unnecessary enumeration on the hot path</p>
</div>
<aside class="notes">
LINQ is great, and I wouldn't want to miss it at all. Yet, on the hot path it is far too easy to
get into troubles with LINQ because it can cause hidden allocations and is difficult for the JIT
to optimize. Let's look at a piece of code from the AmqpReceiver (The "driver" behind Service
Bus and Event Hub Message/Event receival)
</aside>
</section>
<section data-background="benchmarks/PerformanceNet8.png" data-background-size="100%">
<aside class="notes">
As developers and engineers we want rules that we can apply. I have compiled them based on my
experience.
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|8|10|11" data-trim data-noescape><script type="text/template">
public class AmqpReceiver {
ConcurrentBag<Guid> _lockedMessages = new ();
public Task CompleteAsync(IEnumerable<string> lockTokens)
=> CompleteInternalAsync(lockTokens);
Task CompleteInternalAsync(IEnumerable<string> lockTokens)
{
Guid[] lockTokenGuids = lockTokens.Select(token => new Guid(token)).ToArray();
if (lockTokenGuids.Any(lockToken => _lockedMessages.Contains(lockToken)))
{
// do special path accessing lockTokenGuids
return Task.CompletedTask;
}
// do normal path accessing lockTokenGuids
return Task.CompletedTask;
}
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
<aside class="notes">
<ul>
<li>Enumerable (broadest type according to SDK guidelines), string to guid, Any with
contains</li>
<li>Decompile</li>
</ul>
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|8-9|10,17" data-trim data-noescape><script type="text/template">
public class AmqpReceiver {
// ...
// Compiler generated chunk we are not really interested in right now
private Task CompleteInternalAsync(IEnumerable<string> lockTokens)
{
Enumerable.Any(Enumerable.ToArray(Enumerable.Select(lockTokens, <>c.<>9__2_0 ??
(<>c.<>9__2_0 = new Func<string, Guid>(<>c.<>9.<CompleteInternalAsync>b__2_0)))),
new Func<Guid, bool>(<CompleteInternalAsync>b__2_1));
return Task.CompletedTask;
}
[CompilerGenerated]
private bool <CompleteInternalAsync>b__2_1(Guid lockToken)
{
return Enumerable.Contains(_lockedMessages, lockToken);
}
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
<aside class="notes">
For every call of CompleteInternalAsync a new instance of Func<Guid, bool> is allocated
that points to <CompleteInternalAsync>b__2_1. A closure captures the _lockedMessages and
the lockToken as state. This allocation is unnecessary.<br />
It is possible to simply turn the Any into a loop.
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|7-13" data-trim data-noescape><script type="text/template">
public Task CompleteAsync(IEnumerable<string> lockTokens)
=> CompleteInternalAsync(lockTokens);
Task CompleteInternalAsync(IEnumerable<string> lockTokens)
{
Guid[] lockTokenGuids = lockTokens.Select(token => new Guid(token)).ToArray();
foreach (var tokenGuid in lockTokenGuids)
{
if (_requestResponseLockedMessages.Contains(tokenGuid))
{
return Task.CompletedTask;
}
}
return Task.CompletedTask;
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
<aside class="notes">
This gets then compiled down to.
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|6-8" data-trim data-noescape><script type="text/template">
public Task CompleteAsync(IEnumerable<string> lockTokens)
=> CompleteInternalAsync(lockTokens);
Task CompleteInternalAsync(IEnumerable<string> lockTokens)
{
Guid[] array = Enumerable.ToArray(Enumerable.Select(lockTokens,
<>c.<>9__2_0 ??
(<>c.<>9__2_0 = new Func<string, Guid>(<>c.<>9.<CompleteInternalAsync>b__2_0))));
int num = 0;
while (num < array.Length)
{
Guid item = array[num];
if (_requestResponseLockedMessages.Contains(item))
{
return Task.CompletedTask;
}
num++;
}
return Task.CompletedTask;
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
</section>
<section data-background="img/time-g5afc932c4_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Benchmarking Time!</h2>
<p>We can only know the before and after when we measure it.</p>
</div>
</section>
<section data-background="benchmarks/LinqBeforeAfterComparisonCropped.png" data-background-size="75%">
<div style="position:relative;">
<div class="fragment fade-in"
style="position: absolute; left:20%; top:35%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(33, 168, 100, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> ~20-40%<br />
<i class="fa fa-trash-o"></i> ~20-40%
</p>
</div>
</div>
<aside class="notes">
By getting rid of the Any we were able to squeeze out some good performance improvements.
Sometimes, though, we can do even more. For example, there are a few general rules we can follow
when we refactor a code path using LINQ to collection-based operations.
</aside>
</section>
<section data-background="img/office-4249395_1280.jpg">
<aside class="notes">
As developers and engineers we want rules that we can apply. I have compiled them based on my
experience.
</aside>
</section>
<section data-auto-animate>
<h2 class="r-fit-text">LINQ to collection-based operations</h2>
<ul class="r-fit-text">
<li class="fragment fade-in">Use <code>Array.Empty<T></code> to represent empty arrays
</li>
<li class="fragment fade-in">Use <code>Enumerable.Empty<T></code> to represent empty
enumerables</li>
<li class="fragment fade-in">Use CSharp12 collection expressions</li>
<li class="fragment fade-in">Prevent collections from growing</li>
<li class="fragment fade-in">Use concrete collection types</li>
<li class="fragment fade-in">Leverage pattern matching or
<code>Enumerable.TryGetNonEnumeratedCount</code>
</li>
<li class="fragment fade-in">Wait with instantiating collections until really needed</li>
<li class="fragment fade-in">There be dragons
<ul>
<li>Align access or use unsafe to avoid bound checks</li>
<li>Use <a href="https://learn.microsoft.com/en-us/dotnet/api/system.runtime.interopservices.collectionsmarshal">CollectionMarshal</a>/<a href="https://learn.microsoft.com/en-us/dotnet/api/system.runtime.interopservices.memorymarshal">MemoryMarshal</a>/<a href="https://learn.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.unsafe">Unsafe</a> to access the underlying data directly</li>
</ul>
</li>
<li class="fragment fade-in">Keep yourself up to date with latest .NET performance improvements</li>
</ul>
<!-- For the short 45 min version -->
<aside class="notes">
I'm not going to cover the details of these rules. I'm leaving them here for you as a reference.
Should you wish to dig deeper I will be handing out a link to a recording of a longer version of
this talk that goes into more details and shows a before and after of the code we already
optimized. Just to show you what gains we can achieve in the best case here is the improvements
we can by applying all those rules to the previous code in the best/optimal case.
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|6" data-trim data-noescape><script type="text/template">
public Task CompleteAsync(IEnumerable<string> lockTokens)
=> CompleteInternalAsync(lockTokens);
Task CompleteInternalAsync(IEnumerable<string> lockTokens)
{
Guid[] lockTokenGuids = lockTokens.Select(token => new Guid(token)).ToArray();
foreach (var tokenGuid in lockTokenGuids)
{
if (_requestResponseLockedMessages.Contains(tokenGuid))
{
return Task.CompletedTask;
}
}
return Task.CompletedTask;
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
<aside class="notes">
In order to know which of these principles we can apply we have to be aware of what collection
types are usually passed as parameters to the CompleteAsync method. In the .NET Azure SDK the
lockTokens enumerable is almost always an already materialized collection type that implements
IReadOnlyCollection (context)
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.7em"><code class="language-csharp" data-line-numbers="|2-6|10|12-13|17-18" data-trim data-noescape><script type="text/template">
public Task CompleteAsync(IEnumerable<string> lockTokens) {
IReadOnlyCollection<string> readOnlyCollection = lockTokens switch
{
IReadOnlyCollection<string> asReadOnlyCollection => asReadOnlyCollection,
_ => lockTokens.ToArray(),
};
return CompleteInternalAsync(readOnlyCollection);
}
Task CompleteInternalAsync(IReadOnlyCollection<string> lockTokens)
{
int count = lockTokens.Count;
Guid[] lockTokenGuids = count == 0 ? Array.Empty<Guid>() : new Guid[count];
int index = 0;
foreach (var token in lockTokens)
{
var tokenGuid = new Guid(token);
lockTokenGuids[index++] = tokenGuid;
if (_requestResponseLockedMessages.Contains(tokenGuid))
{
return Task.CompletedTask;
}
}
return Task.CompletedTask;
}
</script></code></pre>
<medium class="code-header">Avoid LINQ on the hot path.</medium>
<aside class="notes">
The internal method signature has to be changed to accept a parameter of type
IReadOnlyCollection. For the empty case we can directly use the empty array and in the other
cases we use an array. Because we have the count available, the array can be properly
initialized with the desired count (if we'd be using lists this would be even more important
because lists can automatically grow which can allocate a lot and takes time).
</aside>
</section>
<section data-background="img/time-g5afc932c4_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Benchmarking Time!</h2>
<p>We can only know the before and after when we measure it.</p>
</div>
</section>
<section data-background="benchmarks/LinqAfterComparisonCropped.png" data-background-size="75%">
<div style="position:relative;">
<div class="fragment fade-in-then-out"
style="position: absolute; left:20%; top:35%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(33, 168, 100, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> ~5-64%<br />
<i class="fa fa-trash-o"></i> ~23-61%
</p>
</div>
<div class="fragment fade-in"
style="position: absolute; left:20%; top:35%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(168, 114, 33, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> +56%<br />
<i class="fa fa-trash-o"></i> ~23-61%
</p>
</div>
</div>
<aside class="notes">
<ul>
<li>Lazy enumeration is much worse.</li>
<li>Is that an indication we shouldn't be doing such a refactoring?</li>
<li>Well it depends</li>
<li>If you know what is passed into it it might be a good optimization.</li>
<li>Otherwise readability should be the key driver instead of trying to gold plate every
part of the code base</li>
<li>There are likely other areas that are slowing things down more. </li>
<li>Fire up your favorite memory and performance profiler and get a better understanding
</li>
<li>Like with all things, it is crucial to know when to stop on a given code path and find
other areas that are more impactful to optimize. The context of the piece of code that
you are trying to optimize is key.</li>
</ul>
</aside>
</section>
</section>
<section>
<section data-background="img/excess-g121ed821c_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid excessive allocations to reduce the GC overhead</h2>
<p>Be aware of closure allocations</p>
</div>
<aside class="notes">
We have already touched a bit on closure allocations during our LINQ performance investigations.
But closures can occur anywhere where we have lambdas (Action or Func delegates) being invoked
that access state from the outside of the lambda.
</aside>
</section>
<section>
<pre class="stretch" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|1|2|13" data-trim data-noescape><script type="text/template">
async Task RunOperation(
Func<TimeSpan, Task> operation,
TransportConnectionScope scope, CancellationToken cancellationToken)
{
TimeSpan tryTimeout = CalculateTryTimeout(0);
// omitted
while (!cancellationToken.IsCancellationRequested) {
if (IsServerBusy) {
await Task.Delay(ServerBusyBaseSleepTime, cancellationToken);
}
try {
await operation(tryTimeout);
return;
}
catch {
// omitted
}
}
}
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
<aside class="notes">
Retry method from the ServiceBus SDK that retries operations on certain server failure scenarios
(like RetryAfter when busy).
</aside>
</section>
<section>
<pre class="" style="font-size: 0.7em"><code class="language-csharp" data-line-numbers="|1,4-5" data-trim data-noescape><script type="text/template">
TransportMessageBatch messageBatch = null;
Task createBatchTask = _retryPolicy.RunOperation(async (timeout) =>
{
messageBatch =
await CreateMessageBatchInternalAsync(options, timeout);
},
_connectionScope,
cancellationToken);
await createBatchTask;
return messageBatch;
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
<aside class="notes">
Notice the message batch local is captured as well as the options instance.
Decompile the code.
</aside>
</section>
<section>
<pre class="" style="font-size: 0.7em"><code class="language-csharp" data-line-numbers="|2,8" data-trim data-noescape><script type="text/template">
if (num1 != 0) {
this.\u003C\u003E8__1 = new AmqpSender.\u003C\u003Ec__DisplayClass16_0();
this.\u003C\u003E8__1.\u003C\u003E4__this = this.\u003C\u003E4__this;
this.\u003C\u003E8__1.options = this.options;
this.\u003C\u003E8__1.messageBatch = (TransportMessageBatch) null;
configuredTaskAwaiter = amqpSender._retryPolicy.RunOperation(
new Func<TimeSpan, Task>((object) this.\u003C\u003E8__1,
__methodptr(\u003CCreateMessageBatchAsync\u003Eb__0)),
(TransportConnectionScope) amqpSender._connectionScope,
this.cancellationToken).ConfigureAwait(false).GetAwaiter();
// rest omitted
}
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
</section>
<section>
<pre class="stretch" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|1|2,3|14" data-trim data-noescape><script type="text/template">
internal async ValueTask<TResult> RunOperation<T1, TResult>(
Func<T1, TimeSpan, CancellationToken, ValueTask<TResult>> operation,
T1 t1,
TransportConnectionScope scope,
CancellationToken cancellationToken) {
TimeSpan tryTimeout = CalculateTryTimeout(0);
// omitted
while (!cancellationToken.IsCancellationRequested) {
if (IsServerBusy) {
await Task.Delay(ServerBusyBaseSleepTime, cancellationToken);
}
try {
return await operation(t1, tryTimeout, cancellationToken);
}
catch {
// omitted
}
}
}
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
<aside class="notes">
We can augment this code to accept an input T1 and return an output. The input argument or state
is then passed into the operation method.
With that trick we can build a library tool that allows to deal with functions that return state
as well as actions that return nothing. Essentially properly modelling void to make functional
programmers happy.
</aside>
</section>
<section>
<pre class="" style="font-size: 0.60em"><code class="language-csharp" data-line-numbers="|1|2,3|6,8,9,12|8,12|6,8,9,12" data-trim data-noescape><script type="text/template">
internal async ValueTask RunOperation<T1>(
Func<T1, TimeSpan, CancellationToken, ValueTask> operation,
T1 t1,
TransportConnectionScope scope,
CancellationToken cancellationToken) =>
await RunOperation(static async (value, timeout, token) =>
{
var (t1, operation) = value;
await operation(t1, timeout, token);
return default(object);
},
(t1, operation),
scope, cancellationToken);
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
<aside class="notes">
<ul>
<li>Switch to ValueTask because some operations returned value tasks</li>
<li>Use CSharp 9 static lambda features to make it impossible to access state within the
delegate that is outside</li>
<li>Use the state argument to pass in the state from the method including the operation as a
value tuple into the function</li>
<li>Deconstruct the value tuple and execute the operation passing all necessary state to it.
</li>
</ul>
</aside>
</section>
<section>
<pre class="" style="font-size: 0.6em"><code class="language-csharp" data-line-numbers="|4,9" data-trim data-noescape><script type="text/template">
if (num1 != 0) {
configuredTaskAwaiter = t1._retryPolicy
.RunOperation<AmqpSender, CreateMessageBatchOptions, TransportMessageBatch>(
AmqpSender.\u003C\u003Ec.\u003C\u003E9__16_0 ?? (AmqpSender.\u003C\u003Ec.\u003C\u003E9__16_0 =
new Func<AmqpSender, CreateMessageBatchOptions, TimeSpan, CancellationToken, Task<TransportMessageBatch>>(
(object) AmqpSender.\u003C\u003Ec.\u003C\u003E9,
__methodptr(\u003CCreateMessageBatchAsync\u003Eb__16_0))),
t1,
this.options,
(TransportConnectionScope) t1._connectionScope,
this.cancellationToken).ConfigureAwait(false).GetAwaiter();
// rest omitted
}
</script></code></pre>
<medium class="code-header">Remove closure allocations.</smmediumall>
<aside class="notes">
With that small change, we save the display class and the function delegate allocations and
can
properly usage methods that support value tasks without having to allocate a task instance
when
not necessary.
</aside>
</section>
<section data-visibility="hidden">
<pre class="fit" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|7-9" data-trim data-noescape><script type="text/template">
var someState = new object();
var someOtherState = 42;
var dictionary = new ConcurrentDictionary<string, string>();
dictionary.GetOrAdd("SomeKey", (key) =>
{
return $"{someState}_{someOtherState}";
});
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
</section>
<section data-visibility="hidden">
<pre class="" style="font-size: 0.8em"><code class="language-csharp" data-line-numbers="|1,6" data-trim data-noescape><script type="text/template">
<>c__DisplayClass0_0 <>c__DisplayClass0_ = new <>c__DisplayClass0_0();
<>c__DisplayClass0_.someState = new object();
<>c__DisplayClass0_.someOtherState = 42;
concurrentDictionary.GetOrAdd("SomeKey",
new Func<string, string>(<>c__DisplayClass0_.<<Main>$>b__0));
</script></code></pre>
<medium class="code-header">Remove closure allocations.</smmediumall>
</section>
<section data-visibility="hidden">
<pre class="fit" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|3|5,8" data-trim data-noescape><script type="text/template">
// same as before
dictionary.GetOrAdd("SomeKey", static (key, state) =>
{
var (someState, someOtherState) = state;
return $"{someState}_{someOtherState}";
}, (someState, someOtherState));
</script></code></pre>
<medium class="code-header">Remove closure allocations.</medium>
<aside class="notes">
<ul>
<li>Switch to ValueTask because some operations returned value tasks</li>
<li>Use CSharp 9 static lambda features to make it impossible to access state within the
delegate that is outside</li>
<li>Use the state argument to pass in the state from the method including the operation as a
value tuple into the function</li>
<li>Deconstruct the value tuple and execute the operation passing all necessary state to it.
</li>
</ul>
</aside>
</section>
<section data-visibility="hidden">
<pre class="" style="font-size: 0.65em"><code class="language-csharp" data-line-numbers="|5" data-trim data-noescape><script type="text/template">
object item = new object();
int item2 = 42;
concurrentDictionary.GetOrAdd("SomeKey",
<>c.<>9__0_0 ?? (<>c.<>9__0_0 =
new Func<string, ValueTuple<object, int>, string>(<>c.<>9.<<Main>$>b__0_0)),
new ValueTuple<object, int>(item, item2));
</script></code></pre>
<medium class="code-header">Remove closure allocations.</smmediumall>
</section>
<section data-visibility="hidden" data-background="img/time-g5afc932c4_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Benchmarking Time!</h2>
<p>We can only know the before and after when we measure it.</p>
</div>
</section>
<section data-visibility="hidden" data-background="benchmarks/ConcurrentDictionaryGetOrAdd.png"
data-background-size="95%">
<div style="position:relative;">
<div class="fragment fade-in"
style="position: absolute; left:17%; top:35%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(33, 168, 100, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> ~71%<br />
<i class="fa fa-trash-o"></i> ~Gone!
</p>
</div>
</div>
</section>
<section>
<h2 class="r-fit-text" data-id="thumb">How to detect those allocations?</h2>
<ul>
<li class="fragment fade-in">Use memory profilers and watch out for excessive allocations of
<code>*__DisplayClass*</code> or various variants of <code>Action*</code> and
<code>Func*</code>
</li>
<li class="fragment fade-in">Use tools like <a
href="https://plugins.jetbrains.com/plugin/9223-heap-allocations-viewer">Heap Allocation
Viewer (Rider)</a> or <a
href="https://marketplace.visualstudio.com/items?itemName=MukulSabharwal.ClrHeapAllocationAnalyzer">Heap
Allocation Analyzer (Visual Studio)</a></li>
<li class="fragment fade-in">Many built-in .NET types that use delegates have nowadays generic
overloads that allow to pass state into the delegate.</li>
</ul>
<aside class="notes">
To demonstrate how these can add up in real-world scenarios, let me show you a before and
after
comparison when I removed the closure allocations for NServiceBus pipeline execution
</aside>
</section>
<section>
<div style="position:relative;">
<div class="fragment fade-in"
style="position: absolute; left:20%; top:20%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(33, 168, 100, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> ~74-78%<br />
<i class="fa fa-trash-o"></i> ~Gone!
</p>
</div>
<img src="benchmarks/NServiceBusPipelineExecutionCropped.png" />
<p style="font-size: 1.5em"><a
href="https://go.particular.net/ndc-porto-2023-pipeline">go.particular.net/ndc-porto-2023-pipeline</a>
</p>
</div>
</section>
</section>
<section>
<section data-background="img/excess-g121ed821c_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none;">Avoid excessive allocations to reduce the GC overhead</h2>
<p>Pool and re-use buffers (and larger objects)</p>
</div>
</section>
<section>
<pre class="fit" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|3,4" data-trim data-noescape><script type="text/template">
var data = new ArraySegment<byte>(Guid.NewGuid().ToByteArray());
var guidBuffer = new byte[16];
Buffer.BlockCopy(data.Array, data.Offset, guidBuffer, 0, 16);
var lockTokenGuid = new Guid(guidBuffer);
</script></code></pre>
<medium class="code-header">Pool and re-use buffers.</medium>
<aside class="notes">
Azure Service Bus uses the concept of lock tokens (a glorified GUID) in certain modes to
acknowledge messages. For messages loaded by the client, there is a lock token that needs to be
turned into a GUID representation. When receiving lots of messages this creates countless
unnecessary allocations.
</aside>
</section>
<section>
<pre class="fit" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|1|4" data-trim data-noescape><script type="text/template">
byte[] guidBuffer = ArrayPool<byte>.Shared.Rent(16);
Buffer.BlockCopy(data.Array, data.Offset, guidBuffer, 0, 16);
var lockTokenGuid = new Guid(guidBuffer);
ArrayPool<byte>.Shared.Return(guidBuffer);
</script></code></pre>
<medium class="code-header">Pool and re-use buffers.</medium>
<aside class="notes">
.NET has a built-in mechanism called ArrayPool<T> that allows to have pooled arrays that
can be
reused.
</aside>
</section>
<section data-background="img/time-g5afc932c4_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Benchmarking Time!</h2>
<p>We can only know the before and after when we measure it.</p>
</div>
</section>
<section data-background="benchmarks/BufferAndBlockCopyPooling.png" data-background-size="90%">
<div style="position:relative;">
<div class="fragment fade-in"
style="position: absolute; left:20%; top:-10%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(168, 114, 33, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> +226%<br />
<i class="fa fa-trash-o"></i> ~Gone!
</p>
</div>
</div>
<aside class="notes">
It turns out while we are saving allocations now we haven't really made things much better
overall since the code now takes more than double the time to execute. It might very well be
that this is an acceptable tradeoff for library or framework you are building. That being said,
we can do better.
</aside>
</section>
</section>
<section>
<section data-background="img/excess-g121ed821c_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid excessive allocations to reduce the GC overhead</h2>
<p>For smaller local buffers, consider using the stack</p>
</div>
</section>
<section>
<pre class="fit" style="font-size: 0.9em"><code class="language-csharp" data-line-numbers="|1|2" data-trim data-noescape><script type="text/template">
Span<byte> guidBytes = stackalloc byte[16];
data.AsSpan().CopyTo(guidBytes);
var lockTokenGuid = new Guid(guidBytes);
</script></code></pre>
<medium class="code-header">Small local buffers on stack.</medium>
<aside class="notes">
With the introduction of Span<T> and the stackalloc keyword, we can directly allocate the
memory on the method's stack that is cleared when the method returns. But why even copying when
Guid ctors have support for ReadOnlySpan<T>? Due to having to target .NET Standard 2.0, where we can only pass `byte[]` to the Guid constructor and we have to take endianness into account, the actual version was a bit more complicated and the above example is slightly twisting the reality, call it artistic freedom. We will be talking about techniques of how to
avoid
memory copying later. Where you have to copy memory though this technique comes in handy.
</aside>
</section>
<section data-background="img/time-g5afc932c4_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Benchmarking Time!</h2>
<p>We can only know the before and after when we measure it.</p>
</div>
</section>
<section data-background="benchmarks/StackallocWithGuid.png" data-background-size="90%">
<div style="position:relative;">
<div class="fragment fade-in"
style="position: absolute; left:20%; top:-10%; width: 60%; box-shadow: 0 1px 4px rgba(0, 0, 0, 0.5), 0 5px 25px rgba(0,0,0,0.2); background-color: rgba(33, 168, 100, 0.9); color: #fff; padding: 25px; font-size: 50px; text-align: center;">
<p><i class="fa fa-tachometer" aria-hidden="true"></i> ~45%<br />
<i class="fa fa-trash-o"></i> ~Gone!
</p>
</div>
</div>
<aside class="notes">
Be ware to not overallocate on the stack. This can lead to nasty problems at runtime. When
allocating a certain size sometimes it is better to also skip locals init. We will be talking
about that later.
</aside>
</section>
</section>
<section data-visibility="hidden">
<section data-background="img/excess-g121ed821c_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid excessive allocations to reduce the GC overhead</h2>
<p>Parameter overloads and boxing</p>
</div>
<aside class="notes">
Some methods have parameter overloads of type `params object[]`. That can lead to some sneaky
and costly array allocations that you might not even be aware of. With never incarnations of
.NET there have been a number of improvements done on that area by introducing new method
overloads for common cases that don't require to allocate a parameter array.
</aside>
</section>
<section data-auto-animate>
<code data-id="whenany-title">Task.WhenAny</code>
<pre data-id="whenany"><code class="language-csharp" data-line-numbers="" data-trim data-noescape><script type="text/template">
public static Task<Task> WhenAny(params Task[] tasks);
// most common case
await Task.WhenAny(new[] { task1, task2 });
</script></code></pre>
<small class="code-header">Avoid parameter overloads and boxing.</small>
</section>
<section data-auto-animate>
<code data-id="whenany-title">Task.WhenAny</code>
<pre data-id="whenany"><code class="language-csharp" data-line-numbers="" data-trim data-noescape><script type="text/template">
public static Task<Task> WhenAny(Task task1, Task task2);
await Task.WhenAny(task1, task2);
</script></code></pre>
<small class="code-header">Avoid parameter overloads and boxing.</small>
</section>
<section data-auto-animate>
<code>CancellationTokenSource</code>
<pre data-id="tokensource" style="font-size: 0.5em"><code class="language-csharp" data-line-numbers="" data-trim data-noescape><script type="text/template">
public static CancellationTokenSource CreateLinkedTokenSource(
params CancellationToken[] tokens
);
</script></code></pre>
<small class="code-header">Avoid parameter overloads and boxing.</small>
</section>
<section data-auto-animate>
<code>CancellationTokenSource</code>
<pre data-id="tokensource" style="font-size: 0.5em"><code class="language-csharp" data-line-numbers="" data-trim data-noescape><script type="text/template">
public static CancellationTokenSource CreateLinkedTokenSource(
CancellationToken token1,
CancellationToken token2
);
</script></code></pre>
<small class="code-header">Avoid parameter overloads and boxing.</small>
</section>
</section>
<section data-background="img/office-4249395_1280.jpg">
</section>
<section>
<ul>
<li>Avoid excessive allocations to reduce the GC overhead</li>
<ul>
<li>Think at least twice before using LINQ or
unnecessary enumeration on the hot path</li>
<li>Be aware of closure allocations</li>
<li>Pool and re-use buffers</li>
<li>For smaller local buffers, consider using the stack</li>
<li style="color: lightslategray">Be aware of parameter overloads</li>
<li style="color: lightslategray">Where possible and feasible use value types but pay
attention to unnecessary boxing</li>
<li style="color: lightslategray">Move allocations away from the hot-path where possible</li>
</ul>
</ul>
</section>
<section data-background="img/plotter-2138990_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid unnecessary copying of memory</h2>
</div>
<aside class="notes">
<ul>
<li>The key to avoid unnecessary copying is Span and friends introduced with CSharp 7.3</li>
<li>Span<T> is a value type that enables the representation of contiguous regions of
arbitrary memory</li>
<li>It is a pointer to a memory location and a length to represent the length of the memory
represented by the span.</li>
<li>It can be "sliced" into various chunks, you can represent various slices of memory of
variable length without having to copy the memory</li>
<li>Span<T> can only live on the stack while its cousin Memory<T> can live on the
heap and therefore be used in asynchronous methods.</li>
<li>We want rules I know.</li>
</ul>
</aside>
</section>
<section data-visibility="hidden">
<section data-background="img/plotter-2138990_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid unnecessary copying of memory</h2>
<p>Watch out for immutable/readonly data that is copied</p>
</div>
</section>
<section data-auto-animate>
<pre data-id="body-copy" style="font-size: 0.45em"><code class="language-csharp" data-line-numbers="|11-12" data-trim data-noescape><script type="text/template">
public class ServiceBusReceivedMessage {
public BinaryData Body { get; }
}
public static ServiceBusMessage
CreateFrom(ServiceBusReceivedMessage message) {
//...
var originalBody = message.Body;
if (!originalBody.IsEmpty)
{
var clonedBody = new byte[originalBody.Length];
Array.Copy(originalBody.ToArray(), clonedBody, originalBody.Length);
copiedMessage.Body = clonedBody;
}
}
</script></code></pre>
<small class="code-header">Immutable/readonly data should not be copied.</small>
</section>
<section data-auto-animate>
<pre data-id="body-copy" style="font-size: 0.45em"><code class="language-csharp" data-line-numbers="|11" data-trim data-noescape><script type="text/template">
public class ServiceBusReceivedMessage {
public BinaryData Body { get; }
}
public static ServiceBusMessage
CreateFrom(ServiceBusReceivedMessage message) {
//...
var originalBody = message.Body;
if (!originalBody.IsEmpty)
{
copiedMessage.Body = originalBody;
}
}
</script></code></pre>
<small class="code-header">Immutable/readonly data should not be copied.</small>
<aside class="notes">
Other times memory copying isn't so obvious or requires a deep understand of what is
happening
under the hoods of the framework, library or SDK in use.
</aside>
</section>
</section>
<section>
<section data-background="img/plotter-2138990_1280.jpg">
<div class="image-slide-box">
<h2 style="text-transform: none">Avoid unnecessary copying of memory</h2>
<ul>
<li>Look for Stream and Byte-Array usages that are copied or manipulated without using
<code>Span</code> or <code>Memory</code>
</li>
<li>Replace existing data manipulation methods with newer <code>Span</code> or
<code>Memory</code> based variants
</li>
</ul>
</div>
<aside class="notes">
<ul>
<li>Other times memory copying isn't so obvious or requires a deep understand of what is
happening
under the hoods of the framework, library or SDK in use.</li>
<li>The EventHubs client has recently introduced a new publisher type that uses internally a
partition key resolver that turns string partition keys into hash codes. 30-40% of the
hot path will be using partition keys when publishing, and therefore represents a
non-trivial amount of CPU and memory cycles when using that publisher type. The hash
code function looked like the following.</li>
</ul>
</aside>
</section>
<section>
<pre class="stretch" data-id="compute-hash" style="font-size: 0.7em"><code class="language-csharp" data-line-numbers="|6,7,11" data-trim data-noescape><script type="text/template">
private static short GenerateHashCode(string partitionKey) {
if (partitionKey == null) {
return 0;
}
var encoding = Encoding.UTF8;
ComputeHash(encoding.GetBytes(partitionKey), 0, 0, out uint hash1, out uint hash2);
return (short)(hash1 ^ hash2);
}
private static void ComputeHash(byte[] data, uint seed1, uint seed2,
out uint hash1, out uint hash2) {
uint a, b, c;
a = b = c = (uint)(0xdeadbeef + data.Length + seed1);
c += seed2;
int index = 0, size = data.Length;
while (size > 12) {
a += BitConverter.ToUInt32(data, index);
b += BitConverter.ToUInt32(data, index + 4);
c += BitConverter.ToUInt32(data, index + 8);
// rest omitted
}
</script></code></pre>
<medium class="code-header">Avoid unnecessary copying of memory.</medium>
</section>
<section>
<pre class="stretch" data-id="compute-hash" style="font-size: 0.6em"><code class="language-csharp" data-line-numbers="|10|13|7,13-16|18-19|16,23-25|29|1,15" data-trim data-noescape><script type="text/template">
[SkipLocalsInit]
private static short GenerateHashCode(string partitionKey) {
if (partitionKey == null) {