-
Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathDecisionServiceClient.cs
488 lines (422 loc) · 19.8 KB
/
DecisionServiceClient.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
using Microsoft.Research.MultiWorldTesting.Contract;
using Microsoft.Research.MultiWorldTesting.ExploreLibrary;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
namespace Microsoft.Research.MultiWorldTesting.ClientLibrary
{
public class DecisionServiceClient<TContext> : IDecisionServiceClient<TContext>
{
private readonly IContextMapper<TContext, ActionProbability[]> internalPolicy;
private IRecorder<TContext, int[]> recorder;
private ILogger logger;
private IContextMapper<TContext, ActionProbability[]> initialPolicy;
protected readonly DecisionServiceConfiguration config;
private readonly ApplicationClientMetadata metaData;
private MwtExplorer<TContext, int[], ActionProbability[]> mwtExplorer;
private AzureBlobBackgroundDownloader settingsDownloader;
private AzureBlobBackgroundDownloader modelDownloader;
private INumberOfActionsProvider<TContext> numActionsProvider;
private class OfflineRecorder : IRecorder<TContext, int[]>
{
public void Record(TContext context, int[] value, object explorerState, object mapperState, string uniqueKey)
{
throw new NotSupportedException("Must set an recorder in offline mode");
}
}
public DecisionServiceClient(
DecisionServiceConfiguration config,
ApplicationClientMetadata metaData,
IContextMapper<TContext, ActionProbability[]> internalPolicy,
IContextMapper<TContext, ActionProbability[]> initialPolicy = null,
IFullExplorer<int[]> initialFullExplorer = null,
IInitialExplorer<ActionProbability[], int[]> initialExplorer = null)
{
if (config == null)
throw new ArgumentNullException("config");
if (config.InteractionUploadConfiguration == null)
config.InteractionUploadConfiguration = new JoinUploader.BatchingConfiguration(config.DevelopmentMode);
if (config.ObservationUploadConfiguration == null)
config.ObservationUploadConfiguration = new JoinUploader.BatchingConfiguration(config.DevelopmentMode);
this.config = config;
string appId = string.Empty;
this.metaData = metaData;
if (config.OfflineMode)
{
this.recorder = new OfflineRecorder();
if (config.OfflineApplicationID == null)
{
throw new ArgumentNullException("OfflineApplicationID", "Offline Application ID must be set explicitly in offline mode.");
}
appId = config.OfflineApplicationID;
}
else
{
if (metaData == null)
throw new Exception("Unable to locate a registered MWT application.");
if (this.recorder == null)
{
var joinServerLogger = new JoinServiceLogger<TContext, int[]>(metaData.ApplicationID, config.DevelopmentMode); // TODO: check token remove
switch (config.JoinServerType)
{
case JoinServerType.CustomSolution:
joinServerLogger.InitializeWithCustomAzureJoinServer(
config.LoggingServiceAddress,
config.InteractionUploadConfiguration);
break;
case JoinServerType.AzureStreamAnalytics:
default:
joinServerLogger.InitializeWithAzureStreamAnalyticsJoinServer(
metaData.EventHubInteractionConnectionString,
metaData.EventHubObservationConnectionString,
config.InteractionUploadConfiguration,
config.ObservationUploadConfiguration);
break;
}
this.recorder = (IRecorder<TContext, int[]>)joinServerLogger;
}
var settingsBlobPollDelay = config.PollingForSettingsPeriod == TimeSpan.Zero ? DecisionServiceConstants.PollDelay : config.PollingForSettingsPeriod;
if (settingsBlobPollDelay != TimeSpan.MinValue)
{
this.settingsDownloader = new AzureBlobBackgroundDownloader(config.SettingsBlobUri, settingsBlobPollDelay, downloadImmediately: false, storageConnectionString: config.AzureStorageConnectionString);
this.settingsDownloader.Downloaded += this.UpdateSettings;
this.settingsDownloader.Failed += settingsDownloader_Failed;
}
var modelBlobPollDelay = config.PollingForModelPeriod == TimeSpan.Zero ? DecisionServiceConstants.PollDelay : config.PollingForModelPeriod;
if (modelBlobPollDelay != TimeSpan.MinValue)
{
this.modelDownloader = new AzureBlobBackgroundDownloader(metaData.ModelBlobUri, modelBlobPollDelay, downloadImmediately: true, storageConnectionString: config.AzureStorageConnectionString);
this.modelDownloader.Downloaded += this.UpdateContextMapper;
this.modelDownloader.Failed += modelDownloader_Failed;
}
appId = metaData.ApplicationID;
}
this.logger = this.recorder as ILogger;
this.internalPolicy = internalPolicy;
this.initialPolicy = initialPolicy;
if (initialExplorer != null && initialPolicy != null)
throw new Exception("Initial Explorer and Default Policy are both specified but only one can be used.");
var explorer = new GenericTopSlotExplorer();
// explorer used if model not ready and defaultAction provided
if (initialExplorer == null)
initialExplorer = new EpsilonGreedyInitialExplorer(this.metaData.InitialExplorationEpsilon);
// explorer used if model not ready and no default action provided
if (initialFullExplorer == null)
initialFullExplorer = new PermutationExplorer(1);
var match = Regex.Match(metaData.TrainArguments ?? string.Empty, @"--cb_explore\s+(?<numActions>\d+)");
if (match.Success)
{
var numActions = int.Parse(match.Groups["numActions"].Value);
this.numActionsProvider = new ConstantNumActionsProvider(numActions);
this.mwtExplorer = MwtExplorer.Create(appId,
numActions, this.recorder, explorer, initialPolicy, initialFullExplorer, initialExplorer);
}
else
{
if (initialExplorer != null || metaData.InitialExplorationEpsilon == 1f) // only needed when full exploration
{
numActionsProvider = internalPolicy as INumberOfActionsProvider<TContext>;
if (numActionsProvider == null)
numActionsProvider = explorer as INumberOfActionsProvider<TContext>;
if (numActionsProvider == null)
throw new ArgumentException("Explorer must implement INumberOfActionsProvider interface");
}
this.mwtExplorer = MwtExplorer.Create(appId,
numActionsProvider, this.recorder, explorer, initialPolicy, initialFullExplorer, initialExplorer);
}
}
void modelDownloader_Failed(object sender, Exception e)
{
if (this.config.ModelPollFailureCallback != null)
this.config.ModelPollFailureCallback(e);
}
void settingsDownloader_Failed(object sender, Exception e)
{
if (this.config.SettingsPollFailureCallback != null)
this.config.SettingsPollFailureCallback(e);
}
private void UpdateSettings(object sender, byte[] data)
{
try
{
using (var reader = new JsonTextReader(new StreamReader(new MemoryStream(data))))
{
var jsonSerializer = new JsonSerializer();
var metadata = jsonSerializer.Deserialize<ApplicationClientMetadata>(reader);
// TODO: not sure if we want to bypass or expose EnableExplore in MWT explorer?
this.mwtExplorer.Explorer.EnableExplore(metadata.IsExplorationEnabled);
}
if (this.config.SettingsPollSuccessCallback != null)
this.config.SettingsPollSuccessCallback(data);
}
catch (JsonReaderException jrex)
{
Trace.TraceWarning("Cannot read new settings: " + jrex.Message);
}
}
private void UpdateContextMapper(object sender, byte[] data)
{
if (data == null || data.Length == 0)
{
Trace.TraceWarning("Empty model detected, skipping model update.");
return;
}
using (var stream = new MemoryStream(data))
{
this.UpdateModel(stream);
}
if (this.config.ModelPollSuccessCallback != null)
this.config.ModelPollSuccessCallback(data);
}
public async Task DownloadModelAndUpdate(CancellationToken cancellationToken)
{
using (var wc = new WebClient())
{
byte[] modelData = await wc.DownloadDataTaskAsync(this.metaData.ModelBlobUri);
if (modelData == null || modelData.Length == 0)
{
Trace.TraceWarning("Empty model detected, skipping model update.");
return;
}
using (var ms = new MemoryStream(modelData))
{
ms.Position = 0;
this.UpdateModel(ms);
}
}
}
internal IExplorer<int[], ActionProbability[]> Explorer
{
get { return this.mwtExplorer.Explorer; }
set { this.mwtExplorer.Explorer = value; }
}
internal IContextMapper<TContext, ActionProbability[]> InitialPolicy
{
get { return this.initialPolicy; }
set { this.initialPolicy = value; }
}
public IRecorder<TContext, int[]> Recorder
{
get { return this.recorder; }
set
{
if (value == null)
throw new ArgumentNullException("Recorder");
this.recorder = value;
this.logger = value as ILogger;
this.mwtExplorer.Recorder = value;
}
}
public DecisionServiceClient<TContext> WithRecorder(IRecorder<TContext, int[]> recorder)
{
this.Recorder = recorder;
return this;
}
public async Task<int> ChooseActionAsync(string uniqueKey, TContext context, IPolicy<TContext> defaultPolicy)
{
return await this.ChooseActionAsync(uniqueKey, context, (await defaultPolicy.MapContextAsync(context)).Value, doNotLog: false);
}
public async Task<int> ChooseActionAsync(string uniqueKey, TContext context, IPolicy<TContext> defaultPolicy, bool doNotLog)
{
return await this.ChooseActionAsync(uniqueKey, context, (await defaultPolicy.MapContextAsync(context)).Value, doNotLog);
}
[Obsolete("Use Async version")]
public int ChooseAction(string uniqueKey, TContext context, IPolicy<TContext> defaultPolicy)
{
return this.ChooseActionAsync(uniqueKey, context, defaultPolicy)
.ConfigureAwait(true)
.GetAwaiter()
.GetResult();
}
[Obsolete("Use Async version")]
public int ChooseAction(string uniqueKey, TContext context, int defaultAction)
{
return this.ChooseActionAsync(uniqueKey, context, defaultAction)
.ConfigureAwait(true)
.GetAwaiter()
.GetResult();
}
public Task<int> ChooseActionAsync(string uniqueKey, TContext context, int defaultAction)
{
return ChooseActionAsync(uniqueKey, context, defaultAction, doNotLog: false);
}
public async Task<int> ChooseActionAsync(string uniqueKey, TContext context, int defaultAction, bool doNotLog)
{
var numActions = this.numActionsProvider.GetNumberOfActions(context);
var actions = new int[numActions];
actions[0] = defaultAction;
var action = 1;
for (int i = 1; i < numActions; i++)
{
if (action == defaultAction)
action++;
actions[i] = action;
action++;
}
return (await this.mwtExplorer.ChooseActionAsync(uniqueKey, context, actions, doNotLog))[0];
}
[Obsolete("Use Async version")]
public int ChooseAction(string uniqueKey, TContext context)
{
return this.ChooseRanking(uniqueKey, context)[0];
}
public async Task<int> ChooseActionAsync(string uniqueKey, TContext context)
{
return (await this.ChooseRankingAsync(uniqueKey, context, doNotLog: false))[0];
}
[Obsolete("Use Async version")]
public int[] ChooseRanking(string uniqueKey, TContext context, IRanker<TContext> defaultRanker)
{
return this.ChooseRankingAsync(uniqueKey, context, defaultRanker)
.ConfigureAwait(true)
.GetAwaiter()
.GetResult();
}
[Obsolete("Use Async version")]
public int[] ChooseRanking(string uniqueKey, TContext context, int[] defaultActions)
{
return this.ChooseRankingAsync(uniqueKey, context, defaultActions)
.ConfigureAwait(true)
.GetAwaiter()
.GetResult();
}
[Obsolete("Use Async version")]
public int[] ChooseRanking(string uniqueKey, TContext context)
{
return this.ChooseRankingAsync(uniqueKey, context)
.ConfigureAwait(true)
.GetAwaiter()
.GetResult();
}
public Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context, IRanker<TContext> defaultRanker)
{
return this.ChooseRankingAsync(uniqueKey, context, defaultRanker, doNotLog: false);
}
public async Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context, IRanker<TContext> defaultRanker, bool doNotLog)
{
return await ChooseRankingAsync(uniqueKey, context, (await defaultRanker.MapContextAsync(context)).Value, doNotLog);
}
public Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context, int[] defaultActions)
{
return this.ChooseRankingAsync(uniqueKey, context, defaultActions, doNotLog: false);
}
public async Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context, int[] defaultActions, bool doNotLog)
{
return await this.mwtExplorer.ChooseActionAsync(uniqueKey, context, defaultActions, doNotLog);
}
public Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context)
{
return this.ChooseRankingAsync(uniqueKey, context, doNotLog: false);
}
public async Task<int[]> ChooseRankingAsync(string uniqueKey, TContext context, bool doNotLog)
{
var initialPolicy = this.initialPolicy;
if (initialPolicy != null)
{
return await this.mwtExplorer.ChooseActionAsync(uniqueKey, context, initialPolicy, doNotLog);
}
return await this.mwtExplorer.ChooseActionAsync(uniqueKey, context, doNotLog);
}
/// <summary>
/// Report a simple float reward for the experimental unit identified by the given unique key.
/// </summary>
/// <param name="reward">The simple float reward.</param>
/// <param name="uniqueKey">The unique key of the experimental unit.</param>
public virtual void ReportReward(float reward, string uniqueKey)
{
if (this.logger != null)
this.logger.ReportReward(uniqueKey, reward);
}
/// <summary>
/// Report an outcome in JSON format for the experimental unit identified by the given unique key.
/// </summary>
/// <param name="outcomeJson">The outcome object in JSON format.</param>
/// <param name="uniqueKey">The unique key of the experimental unit.</param>
/// <remarks>
/// Outcomes are general forms of observations that can be converted to simple float rewards as required by some ML algorithms for optimization.
/// </remarks>
public void ReportOutcome(object outcome, string uniqueKey)
{
if (this.logger != null)
logger.ReportOutcome(uniqueKey, outcome);
}
/// <summary>
/// TODO: Stream needs to be disposed by users
/// </summary>
/// <param name="model"></param>
public void UpdateModel(Stream model)
{
var updatable = this.internalPolicy as IUpdatable<Stream>;
if (updatable != null)
{
updatable.Update(model);
// Swap out initial policy and use the internal policy to handle new model
this.mwtExplorer.Policy = this.internalPolicy;
Trace.TraceInformation("Model update succeeded.");
}
}
/// <summary>
/// Diposes resources.
/// </summary>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
/// <summary>
/// Diposes resources.
/// </summary>
public virtual void Dispose(bool disposing)
{
// Always free unmanaged objects, but conditionally free managed objets if this is being
// called from Dispose() (as opposed a finalizer, currently not implemented)
if (disposing)
{
if (this.settingsDownloader != null)
{
this.settingsDownloader.Dispose();
this.settingsDownloader = null;
}
if (this.modelDownloader != null)
{
this.modelDownloader.Dispose();
this.modelDownloader = null;
}
if (this.recorder != null)
{
// Flush any pending data to be logged
var disposable = this.recorder as IDisposable;
if (disposable != null)
disposable.Dispose();
recorder = null;
}
if (this.mwtExplorer != null)
{
this.mwtExplorer.Dispose();
this.mwtExplorer = null;
}
}
}
private sealed class ConstantNumActionsProvider : INumberOfActionsProvider<TContext>
{
private int numActions;
internal ConstantNumActionsProvider(int numActions)
{
this.numActions = numActions;
}
public int GetNumberOfActions(TContext context)
{
return numActions;
}
}
}
}