-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathConversionsExtensionsCatalog.cs
537 lines (503 loc) · 37.9 KB
/
ConversionsExtensionsCatalog.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
{
using static Microsoft.ML.Transforms.HashingEstimator;
using ConvertDefaults = TypeConvertingEstimator.Defaults;
using HashDefaults = HashingEstimator.Defaults;
/// <summary>
/// Collection of extension methods for <see cref="TransformsCatalog"/> to create instances of data conversion and mapping transformer components.
/// </summary>
public static class ConversionsExtensionsCatalog
{
/// <summary>
/// Create a <see cref="HashingEstimator"/>, which hashes the data from the column specified in <paramref name="inputColumnName"/>
/// to a new column: <paramref name="outputColumnName"/>.
/// </summary>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// This column's data type will be a vector of keys, or a scalar of key based on whether the input column data types
/// are vectors or scalars.</param>
/// <param name="inputColumnName">Name of the column whose data will be hashed.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// This estimator operates over vectors or scalars of text, numeric, boolean, key or <see cref="DataViewRowId"/> data types. </param>
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
/// <param name="maximumNumberOfInverts">During hashing we construct mappings between original values and the produced hash values.
/// Text representation of original values are stored in the slot names of the annotations for the new column.Hashing, as such, can map many initial values to one.
/// <paramref name="maximumNumberOfInverts"/>Specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Hash](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs)]
/// ]]></format>
/// </example>
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null,
int numberOfBits = HashDefaults.NumberOfBits, int maximumNumberOfInverts = HashDefaults.MaximumNumberOfInverts)
=> new HashingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, numberOfBits, maximumNumberOfInverts);
/// <summary>
/// Create a <see cref="HashingEstimator"/>, which hashes the input column's data type <see cref="ColumnOptions.InputColumnName" />
/// to a new column: <see cref="ColumnOptions.Name" />.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="columns">Advanced options for the estimator that also contain the input and output column names.
/// This estimator operates over text, numeric, boolean, key and <see cref="DataViewRowId"/> data types.
/// The new column's data type will be a vector of <see cref="System.UInt32"/>, or a <see cref="System.UInt32"/> based on whether the input column data types
/// are vectors or scalars.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Hash](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/HashWithOptions.cs)]
/// ]]></format>
/// </example>
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns)
=> new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary>
/// Create a <see cref="TypeConvertingEstimator"/>, which converts the type of the data to the type specified in <paramref name="outputKind"/>.
/// </summary>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// This transform operates over numeric, boolean, text, <see cref="System.DateTime"/> and key data types.</param>
/// <param name="outputKind">The expected kind of the output column.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[ConvertType](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs)]
/// ]]></format>
/// </example>
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null,
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) });
/// <summary>
/// Create a <see cref="TypeConvertingEstimator"/>, which converts the type of the data to the type specified in <paramref name="outputKind"/>.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// This transform operates over numeric, boolean, text, <see cref="System.DateTime"/> and key data types.</param>
/// <param name="outputKind">The expected kind of the output column.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[ConvertType](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs)]
/// ]]></format>
/// </example>
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog,
InputOutputColumnPair[] columns,
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var columnOptions = columns.Select(x => new TypeConvertingEstimator.ColumnOptions(x.OutputColumnName, outputKind, x.InputColumnName)).ToArray();
return new TypeConvertingEstimator(env, columnOptions);
}
/// <summary>
/// Create a <see cref="TypeConvertingEstimator"/>, which converts the type of the data in the <see cref="TypeConvertingEstimator.ColumnOptions.InputColumnName"/>
/// to the type specified in the <see cref="TypeConvertingEstimator.ColumnOptions.OutputKind"/>
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// This transform operates over numeric, boolean, text, <see cref="System.DateTime"/> and keys.</param>
[BestFriend]
internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns)
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary>
/// Create a <see cref="KeyToValueMappingEstimator"/>, which converts the key types back to their original values.
/// </summary>
/// <remarks>This transform often is in the pipeline after one of the overloads of
/// <see cref="MapValueToKey(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[], int, ValueToKeyMappingEstimator.KeyOrdinality, bool, IDataView)"/></remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// Its type will be the original value's type.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// This transform operates over keys.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapKeyToValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)]
/// ]]>
/// </format>
/// </example>
public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null)
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
/// <summary>
/// Create a <see cref="KeyToValueMappingEstimator"/>, which converts the key types back to their original values.
/// </summary>
/// <remarks>This transform can operate over several columns.
/// This transform often is in the pipeline after one of the overloads of
/// <see cref="MapValueToKey(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[], int, ValueToKeyMappingEstimator.KeyOrdinality, bool, IDataView)"/></remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// This transform operates over keys.
/// The new column's data type will be the original value's type.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapKeyToValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs)]
/// ]]>
/// </format>
/// </example>
public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new KeyToValueMappingEstimator(env, columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
}
/// <summary>
/// Create a <see cref="KeyToVectorMappingEstimator"/>, which maps the value of a key into a floating point vector representing the value.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.</param>
[BestFriend]
internal static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
params KeyToVectorMappingEstimator.ColumnOptions[] columns)
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary>
/// Create a <see cref="KeyToVectorMappingEstimator"/>, which maps the value of a key into a floating point vector representing the value.
/// </summary>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The data type is a vector of <see cref="System.Single"/> representing the input value.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// This transform operates over keys.</param>
/// <param name="outputCountVector">Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them.
/// This is only relevant when the input column is a vector of keys.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapKeyToVector](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs)]
/// ]]></format>
/// </example>
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName, string inputColumnName = null, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputCountVector);
/// <summary>
/// Create a <see cref="KeyToVectorMappingEstimator"/>, which maps the value of a key into a floating point vector representing the value.
/// </summary>
/// <remarks>This transform can operate over several columns of keys.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// The new column's data type is a vector of <see cref="System.Single"/> representing the original value.</param>
/// <param name="outputCountVector">Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them.
/// This is only relevant when the input column is a vector of keys.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapKeyToVector](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs)]
/// ]]></format>
/// </example>
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
InputOutputColumnPair[] columns, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var columnOptions = columns.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputCountVector)).ToArray();
return new KeyToVectorMappingEstimator(env, columnOptions);
}
/// <summary>
/// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into numerical keys.
/// </summary>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="outputColumnName">Name of the column containing the keys.</param>
/// <param name="inputColumnName">Name of the column containing the categorical values. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> is used.
/// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>.
/// </param>
/// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param>
/// <param name="keyOrdinality">The order in which keys are assigned.
/// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered.
/// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param>
/// <param name="addKeyValueAnnotationsAsText">If set to true, use text type
/// for values, regardless of the actual input type. When doing the reverse
/// mapping, the values are text rather than the original input type.</param>
/// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building
/// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values.
/// The keys are allocated based on the value of keyOrdinality.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)]
/// ]]>
/// </format>
/// </example>
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName,
string inputColumnName = null,
int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
IDataView keyData = null)
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog),
new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
/// <summary>
/// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into keys.
/// </summary>
/// <remarks>This transform can operate over multiple pairs of columns, creating a mapping for each pair.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>.
/// </param>
/// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param>
/// <param name="keyOrdinality">The order in which keys are assigned.
/// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered.
/// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param>
/// <param name="addKeyValueAnnotationsAsText">If set to true, use text type
/// for values, regardless of the actual input type. When doing the reverse
/// mapping, the values are text rather than the original input type.</param>
/// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building
/// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values.
/// The keys are allocated based on the value of keyOrdinality.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs)]
/// ]]>
/// </format>
/// </example>
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
InputOutputColumnPair[] columns,
int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
IDataView keyData = null)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray();
return new ValueToKeyMappingEstimator(env, columnOptions, keyData);
}
/// <summary>
/// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts value types into keys, optionally loading the keys to use from <paramref name="keyData"/>.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The conversion transform's catalog.</param>
/// <param name="columns">The input and output columns.
/// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>.
/// </param>
/// <param name="keyData">The data view containing the terms. If specified, this should be a single column data
/// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined
/// from the input data upon fitting.</param>
[BestFriend]
internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null)
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData);
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
/// </summary>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The output data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.</param>
/// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
/// <param name="inputColumnName">Name of the column to transform.
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// The input data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.
/// </param>
/// <param name="treatValuesAsKeyType">Whether to treat the values as a key.</param>
/// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs)]
/// ]]></format>
/// </example>
public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName,
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
string inputColumnName = null,
bool treatValuesAsKeyType = false)
{
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType);
return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
/// <param name="columns">The input and output columns.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.
/// </param>
[BestFriend]
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
this TransformsCatalog.ConversionTransforms catalog,
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, false);
return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
/// <param name="treatValuesAsKeyType">Whether to treat the values as a keys.</param>
/// <param name="columns">The input and output columns.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.
/// </param>
[BestFriend]
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
this TransformsCatalog.ConversionTransforms catalog,
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
bool treatValuesAsKeyType,
params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType);
return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
/// </summary>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types,
/// as specified in the <typeparamref name="TOutputType"/>.</param>
/// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types,
/// as specified in the <typeparamref name="TInputType"/>.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs)]
/// ]]></format>
/// </example>
public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName,
IEnumerable<KeyValuePair<TInputType, TOutputType[]>> keyValuePairs,
string inputColumnName = null)
{
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
// Convert parallel key and value lists to IDataView with two columns, so that the underlying infra can use it.
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName);
return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param>
/// <param name="columns">The input and output columns. The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>,
/// <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types, as specified in the <typeparamref name="TInputType"/> and <typeparamref name="TOutputType"/>.</param>
[BestFriend]
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
this TransformsCatalog.ConversionTransforms catalog,
IEnumerable<KeyValuePair<TInputType, TOutputType[]>> keyValuePairs,
params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName);
return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from the <paramref name="lookupMap"/> where the <paramref name="keyColumn"/>
/// specifies the keys, and the <paramref name="valueColumn"/> the respective value.
/// </summary>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/>
/// or <see cref="DataViewRowId"/> types.</param>
/// <param name="lookupMap">An instance of <see cref="IDataView"/> that contains the <paramref name="keyColumn"/> and <paramref name="valueColumn"/> columns.</param>
/// <param name="keyColumn">The key column in <paramref name="lookupMap"/>.</param>
/// <param name="valueColumn">The value column in <paramref name="lookupMap"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.
/// The data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/>
/// or <see cref="DataViewRowId"/> types.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs)]
/// ]]></format>
/// </example>
public static ValueMappingEstimator MapValue(
this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName, IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, string inputColumnName = null)
{
return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn, valueColumn,
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}
/// <summary>
/// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys,
/// loading the keys to use from the <paramref name="lookupMap"/> where the <paramref name="keyColumn"/>
/// specifies the keys, and the <paramref name="valueColumn"/> the respective value.
/// </summary>
/// <remarks>This transform can operate over several columns.</remarks>
/// <param name="catalog">The conversion transform's catalog</param>
/// <param name="lookupMap">An instance of <see cref="IDataView"/> that contains the <paramref name="keyColumn"/> and <paramref name="valueColumn"/> columns.</param>
/// <param name="keyColumn">The key column in <paramref name="lookupMap"/>.</param>
/// <param name="valueColumn">The value column in <paramref name="lookupMap"/>.</param>
/// <param name="columns">The input and output columns.</param>
[BestFriend]
internal static ValueMappingEstimator MapValue(
this TransformsCatalog.ConversionTransforms catalog,
IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new ValueMappingEstimator(env, lookupMap, keyColumn, valueColumn, InputOutputColumnPair.ConvertToValueTuples(columns));
}
}
}