-
Notifications
You must be signed in to change notification settings - Fork 0
/
deadlock-in-dllmain.html
253 lines (239 loc) · 17.4 KB
/
deadlock-in-dllmain.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
<!DOCTYPE html>
<!--[if IE 8]> <html class="no-js lt-ie9" lang="zh-cn"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="zh-cn"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>道可叨 | DllMain 中的死锁问题 </title>
<meta name="viewport" content="width=device-width">
<meta name="author" content="Qiang">
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico">
<link href="http://localhost:8000/feeds/atom.xml" type="application/atom+xml" rel="alternate" title="道可叨 Atom Feed" />
<link rel="stylesheet" href="https://zhuoqiang.github.io/theme/css/app.css">
<script src="https://zhuoqiang.github.io/theme/js/vendor/custom.modernizr.js"></script>
</head>
<body>
<header>
<hgroup>
<a href="https://zhuoqiang.github.io">
<h1 i18n>道可叨</h1>
<h2>Free Will</h2>
</a>
</hgroup>
</header>
<section>
<article>
<header>
<h1><a href="https://zhuoqiang.github.io/deadlock-in-dllmain.html">DllMain 中的死锁问题</a></h1>
</header> <div>
<section>
<div class="contents local topic" id="id1">
<ul class="simple">
<li><a class="reference internal" href="#id2" id="id6">问题</a></li>
<li><a class="reference internal" href="#id3" id="id7">原因</a></li>
<li><a class="reference internal" href="#id4" id="id8">解</a></li>
<li><a class="reference internal" href="#id5" id="id9">教训</a></li>
</ul>
</div>
<div class="section" id="id2">
<h2><a class="toc-backref" href="#id6">问题</a></h2>
<p>某程序要在动态加载的 DLL 里创建线程,并希望 DLL 在被 <tt class="docutils literal">FreeLibrary</tt> 时能自动销毁线程,以实现安全退出。DLL 的实现代码可简写为</p>
<div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">Worker</span>
<span class="p">{</span>
<span class="k">public</span><span class="o">:</span>
<span class="n">Worker</span><span class="p">()</span>
<span class="o">:</span> <span class="n">quit_</span><span class="p">(</span><span class="nb">true</span><span class="p">)</span>
<span class="p">{}</span>
<span class="o">~</span><span class="n">Worker</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">Stop</span><span class="p">();</span>
<span class="p">}</span>
<span class="kt">void</span> <span class="n">Work</span><span class="p">()</span>
<span class="p">{</span>
<span class="k">if</span> <span class="p">(</span><span class="n">thread_</span><span class="p">)</span>
<span class="p">{</span>
<span class="k">return</span><span class="p">;</span>
<span class="p">}</span>
<span class="n">thread_</span><span class="p">.</span><span class="n">reset</span><span class="p">(</span>
<span class="k">new</span> <span class="n">boost</span><span class="o">::</span><span class="kr">thread</span><span class="p">(</span>
<span class="n">boost</span><span class="o">::</span><span class="n">bind</span><span class="p">(</span>
<span class="o">&</span><span class="n">Worker</span><span class="o">::</span><span class="n">DoWork</span><span class="p">,</span> <span class="k">this</span><span class="p">)));</span>
<span class="p">}</span>
<span class="kt">void</span> <span class="n">Stop</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">quit_</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span>
<span class="k">if</span> <span class="p">(</span><span class="n">thread_</span><span class="p">)</span>
<span class="p">{</span>
<span class="n">thread_</span><span class="o">-></span><span class="n">join</span><span class="p">();</span>
<span class="n">thread_</span><span class="p">.</span><span class="n">reset</span><span class="p">();</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="k">private</span><span class="o">:</span>
<span class="kt">void</span> <span class="n">DoWork</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">quit_</span> <span class="o">=</span> <span class="nb">false</span><span class="p">;</span>
<span class="k">while</span> <span class="p">(</span><span class="o">!</span> <span class="n">quit_</span><span class="p">)</span>
<span class="p">{</span>
<span class="n">DoSomething</span><span class="p">();</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="n">boost</span><span class="o">::</span><span class="n">scoped_ptr</span><span class="o"><</span><span class="n">boost</span><span class="o">::</span><span class="kr">thread</span><span class="o">></span> <span class="n">thread_</span><span class="p">;</span>
<span class="kt">bool</span> <span class="k">volatile</span> <span class="n">quit_</span><span class="p">;</span>
<span class="p">};</span>
<span class="n">Worker</span> <span class="n">gWorker</span><span class="p">;</span>
<span class="kr">__declspec</span><span class="p">(</span><span class="n">dllexport</span><span class="p">)</span> <span class="kt">void</span> <span class="n">StartThread</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">gWorker</span><span class="p">.</span><span class="n">Work</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
<p>这里使用了 C++ 的全局变量和 <tt class="docutils literal"><span class="pre">boost::thread</span></tt> 库,并且没有显示定义 <tt class="docutils literal">DllMain</tt> 函数。这种情况下,C++ Runtime 会提供一份隐藏的 <tt class="docutils literal">DllMain</tt>,它会在 <tt class="docutils literal">FreeLibrary</tt> 时销毁全局变量和静态变量,比如 <tt class="docutils literal">gWorker</tt> 对象。 而 <tt class="docutils literal">Worker</tt> 类又用 RAII 手法在析构函数中通知并等待线程退出。这样就能保证在 <tt class="docutils literal">FreeLibrary</tt> 时 DLL 中创建的线程自动销毁了。该方法很简洁,也应该行的通。但在实际调用 <tt class="docutils literal">FreeLibrary</tt> 时,程序在 C++ Runtime 的 <tt class="docutils literal">DllMain</tt> 中发生死锁。</p>
</div>
<div class="section" id="id3">
<h2><a class="toc-backref" href="#id7">原因</a></h2>
<p>死锁最常见的原因就是多个线程彼此拥有了对方需要的资源,大家都在等对方释放资源好继续运行。从代码层面上来说,就是多个锁的获取顺序不一致:</p>
<div class="highlight"><pre><span></span><span class="c1">// A 和 B 拿刀叉的顺序不一样,如果只有一副刀叉,他俩可能都得饿死</span>
<span class="n">Thread_A</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">lock_guard</span> <span class="n">lock</span><span class="p">(</span><span class="n">mutex_for_fork</span><span class="p">);</span>
<span class="n">lock_guard</span> <span class="nf">lock</span><span class="p">(</span><span class="n">mutex_for_knife</span><span class="p">);</span>
<span class="n">eat</span><span class="p">();</span>
<span class="p">}</span>
<span class="n">Thread_B</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">lock_guard</span> <span class="n">lock</span><span class="p">(</span><span class="n">mutex_for_knife</span><span class="p">);</span>
<span class="n">lock_guard</span> <span class="nf">lock</span><span class="p">(</span><span class="n">mutex_for_fork</span><span class="p">);</span>
<span class="n">eat</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
<p>太阳下面无新事, <tt class="docutils literal">DllMain</tt> 死锁的原因也是一样。原来,微软为了保证多个动态库 load / unload 并发调用的多线程安全,在 <tt class="docutils literal">DllMain</tt> 上加了把 loader lock 全局锁:在 <tt class="docutils literal">LoadLibrary</tt> 时,为避免多个线程同时 load 同一个 DLL,系统会先获取 loader lock 锁再进行检查。只有当发现没有加载过该 DLL 时才真正加载这个 DLL 并调用它的 <tt class="docutils literal">DllMain</tt> ,在这一切做完后再将 loader 锁释放。 <tt class="docutils literal">FreeLibrary</tt> 的时候同样如此。唯有这样,才能保证并发 load/unload DLL 时不出乱子。</p>
<p>问题来了,DLL 中任意一个线程(包括主线程和派生线程)的退出系统都会调用 <tt class="docutils literal">DllMain</tt> 进行通知。那如果 <tt class="docutils literal">DllMain</tt> 中的代码要是再与其它线程进行同步的话,这就相当于两把锁,极有可能产生死锁。具体到我们的例子,在 <tt class="docutils literal">FreeLibrary</tt> 时发生了什么呢</p>
<ul class="simple">
<li>主线程中<ol class="arabic">
<li>获取 loader lock,安全调用 <tt class="docutils literal">DllMain</tt></li>
<li>通知 worker thread 退出</li>
<li>使用 <tt class="docutils literal">WaitForSingleObject</tt> 等待 worker thread 退出</li>
</ol>
</li>
<li>Worker 线程<ol class="arabic">
<li>收到退出通知</li>
<li>获取 loader lock,安全调用 <tt class="docutils literal">DllMain</tt></li>
<li>线程退出</li>
</ol>
</li>
</ul>
<p>换句话说,主线程退出就是先拿了叉子(loader lock),再要拿刀( <tt class="docutils literal">WaitForSingleObject</tt> )。而工作线程退出就是要先拿叉子(loader lock)才能给你刀(唤醒 <tt class="docutils literal">WaitForSingleObject</tt> )。死锁发生了!</p>
<div class="highlight"><pre><span></span><span class="c1">// 伪代码</span>
<span class="n">Main_Thread</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">Lock</span><span class="p">(</span><span class="n">loader_lock</span><span class="p">);</span>
<span class="n">WaitForSingleObject</span><span class="p">(</span><span class="n">Worker_Thread</span><span class="p">);</span>
<span class="p">}</span>
<span class="n">Worker_Thread</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">Lock</span><span class="p">(</span><span class="n">loader_lock</span><span class="p">);</span>
<span class="n">ThreadExit</span><span class="p">(</span><span class="n">Worker_Thread</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
<p>微软充分预见到这个危险,在 <cite>Best Practices for Creating DLLs</cite> 中就详细说明了 loader lock 以及 <tt class="docutils literal">DllMain</tt> 中的编码注意事项,包括禁止在 <tt class="docutils literal">DllMain</tt> 中进行线程同步。</p>
</div>
<div class="section" id="id4">
<h2><a class="toc-backref" href="#id8">解</a></h2>
<p>没有漂亮的解。建议不提供自动释放线程的功能。既然导出了创建工作线程的函数,那就再导出一个停止线程的函数。Client 要在 <tt class="docutils literal">FreeLibrary</tt> 之前主动调用。</p>
<div class="highlight"><pre><span></span><span class="kr">__declspec</span><span class="p">(</span><span class="n">dllexport</span><span class="p">)</span> <span class="kt">void</span> <span class="n">StopThread</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">gWorker</span><span class="p">.</span><span class="n">Stop</span><span class="p">();</span>
<span class="p">}</span>
</pre></div>
<p>如果仍然希望提供自动清除线程这种安全的便利性,需要的改动是:在主线程 <tt class="docutils literal">DllMain</tt> 中不等待工作线程退出,而是等待工作线程进行必要的清理动作后发出通知再强行终止工作线程;同时,工作线程在通知发出后还要进行无限等待以避免过早退出后进入 <tt class="docutils literal">DllMain</tt> 导致死锁。</p>
<div class="highlight"><pre><span></span><span class="kt">bool</span> <span class="n">gQuit</span> <span class="o">=</span> <span class="nb">false</span><span class="p">;</span>
<span class="n">Event</span> <span class="n">gEvent</span><span class="p">;</span>
<span class="kt">void</span> <span class="nf">WorkerThread</span><span class="p">()</span>
<span class="p">{</span>
<span class="k">while</span> <span class="p">(</span><span class="o">!</span> <span class="n">quit_</span><span class="p">)</span>
<span class="p">{</span>
<span class="n">DoSomething</span><span class="p">();</span>
<span class="p">}</span>
<span class="n">FinishJob</span><span class="p">();</span>
<span class="n">CleanUp</span><span class="p">();</span>
<span class="n">SetEvent</span><span class="p">(</span><span class="n">gEvent</span><span class="p">);</span> <span class="c1">// 现在你可以强行终止我了</span>
<span class="k">while</span> <span class="p">(</span><span class="nb">true</span><span class="p">)</span> <span class="c1">// 等待强杀</span>
<span class="p">{</span>
<span class="n">Sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
<span class="p">}</span>
<span class="p">}</span>
<span class="n">DllMain</span><span class="p">()</span>
<span class="p">{</span>
<span class="n">gQuit</span> <span class="o">=</span> <span class="nb">true</span><span class="p">;</span>
<span class="n">WaitForSingleObject</span><span class="p">(</span><span class="n">gEvent</span><span class="p">);</span>
<span class="n">TerminateThread</span><span class="p">(</span><span class="n">gThreadHandler</span><span class="p">);</span>
<span class="p">}</span>
</pre></div>
<p>这里不推荐这个方案。仅仅为了 client 调用方便就引入这么晦涩的逻辑,费效比低,也不利于代码维护。而且在强杀线程前一定要确认线程所用的资源都处于一致的状态,这个前置条件是颗定时炸弹,对代码维护人员是个考验。</p>
</div>
<div class="section" id="id5">
<h2><a class="toc-backref" href="#id9">教训</a></h2>
<p>为了让代码简洁方便又安全,人们发明了很多东西。比如 C++ runtime 自动管理全局变量的生命周期;微软使用 loader lock 保证 DLL 并发加载的安全性;以及我们例子中希望在 <tt class="docutils literal">FreeLibrary</tt> 时自动释放工作线程。这些确实方便了代码,但同时,每一个简化都是有代价的:</p>
<ul class="simple">
<li>C++ 全局变量初始化和释放的自动管理让调试变的困难,也让初始化和释放的顺序不可控</li>
<li>微软的 loader lock 让 <tt class="docutils literal">DllMain</tt> 的编写多了一些潜规则和禁忌</li>
<li>要正确实现 DLL 退出时自动释放工作线程的功能,这要化相当的精力,而且代码的可读性和可维护性都有潜在的问题</li>
</ul>
<p>好设计必须充分权衡收益和代价。换言之,本就没有 <cite>好的</cite> 设计,只有 <cite>合适的</cite> 设计。</p>
</div>
</section>
</div>
<footer>
<p>
<time datetime="2010-05-31T00:46:00+08:00" pubdate>2010-05-31</time><a href="https://zhuoqiang.github.io/category/programming.html"><span class="category" i18n>programming</span></a>
</p>
<ul>
<li><a href="https://zhuoqiang.github.io/tag/c.html"><span class="tag" i18n>c++</span></i></a>
<li><a href="https://zhuoqiang.github.io/tag/concurrent.html"><span class="tag" i18n>concurrent</span></i></a>
</ul>
</footer><link rel="stylesheet" href="https://unpkg.com/gitalk/dist/gitalk.css">
<script src="https://unpkg.com/gitalk@latest/dist/gitalk.min.js"></script>
<div id="gitalk-container"></div>
<script type="text/javascript">
var gitalk = new Gitalk({
clientID: "2f60008869581fd61d19",
clientSecret: "6b303dcf58c04cd35782bae82540921663cbfda8",
repo: "zhuoqiang.github.com",
owner: "zhuoqiang",
admin: ["zhuoqiang"],
id: "deadlock-in-dllmain",
title: "DllMain 中的死锁问题",
perPage: 30,
distractionFreeMode: true,
pagerDirection: "last",
});
gitalk.render('gitalk-container');
</script></article>
</section>
<footer>
<p>@ <a href="mailto:zhuo.qiang@gmail.com"><span i18n>Qiang</span></a> | <span class="heart"><a href="https://github.com/getpelican/pelican">Pelican</a> & <a href="https://bitbucket.org/zhuoqiang/jing">Jing</a></span></p>
</footer>
<script src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-1.8.3.min.js"></script>
<script>window.jQuery || document.write('<script src="https://zhuoqiang.github.io/theme/js/vendor/jquery-1.8.3.min.js"><\/script>')</script>
<script src="https://zhuoqiang.github.io/theme/js/vendor/moment.min.js"></script>
<script src="https://zhuoqiang.github.io/theme/js/vendor/moment-lang.min.js"></script>
<script src="https://zhuoqiang.github.io/theme/js/vendor/i18next-1.6.0.min.js"></script>
<script src="https://zhuoqiang.github.io/theme/js/main.js"></script>
<script type="text/javascript">
var GoSquared = {};
GoSquared.acct = "GSN-743443-H";
(function(w){
function gs(){
w._gstc_lt = +new Date;
var d = document, g = d.createElement("script");
g.type = "text/javascript";
g.src = "//d1l6p2sc9645hc.cloudfront.net/tracker.js";
var s = d.getElementsByTagName("script")[0];
s.parentNode.insertBefore(g, s);
}
w.addEventListener ?
w.addEventListener("load", gs, false) :
w.attachEvent("onload", gs);
})(window);
</script>
</body>
</html>