-
Notifications
You must be signed in to change notification settings - Fork 0
/
BaseSerializer.cs
85 lines (74 loc) · 3.47 KB
/
BaseSerializer.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
using System.Collections.Generic;
using System.Linq;
using PRFramework.Core.Common;
using PRFramework.Core.DatasetInfo;
using System;
namespace PRFramework.Core.IO
{
public class BaseSerializer
{
public static void LoadInstancesInformation(InstanceModel model, IEnumerable<Instance> instances)
{
foreach (var feature in model.Features)
{
LoadFeatureInformation(feature, model, instances);
}
FillDatasetInformation(model, instances);
}
public static void LoadFeatureInformation(Feature feature, InstanceModel model, IEnumerable<Instance> instances, bool fillDatasetInformation = false)
{
if (feature is CategoricalFeature)
{
var len = ((CategoricalFeature)feature).Values.Length;
double[] valuesCount = new double[len];
for (int i = 0; i < len; i++)
valuesCount[i] = instances.Count(x => x[feature] == i && !FeatureValue.IsMissing(x[feature]));
var valuesmissing = instances.Select(x => x[feature]).Count(FeatureValue.IsMissing);
var valueProbability = valuesCount.Select(x => x / (valuesCount.Sum() * 1.0)).ToArray();
var ratio = valuesCount.Select(x => x / (valuesCount.Min() * 1F)).ToArray();
feature.FeatureInformation = new NominalFeatureInformation()
{
Distribution = valuesCount,
MissingValueCount = valuesmissing,
ValueProbability = valueProbability,
Ratio = ratio,
Feature = feature,
};
}
else if (feature is NumericFeature)
{
var nonMissingValues = instances.Where(x => !FeatureValue.IsMissing(x[feature])).Select(x => x[feature]).ToArray();
var valuesmissing = instances.Count() - nonMissingValues.Length;
double max, min;
if (nonMissingValues.Length > 0)
{
max = nonMissingValues.Max();
min = nonMissingValues.Min();
}
else
{
max = 0;
min = 0;
}
feature.FeatureInformation = new NumericFeatureInformation
{
MissingValueCount = valuesmissing,
MaxValue = max,
MinValue = min,
Feature = feature,
};
}
if (fillDatasetInformation)
FillDatasetInformation(model, instances);
}
public static void FillDatasetInformation(InstanceModel model, IEnumerable<Instance> instances)
{
var datasetInformation = new DatasetInformation();
int objWithIncompleteData = instances.Count(instance => model.Features.Any(feature => FeatureValue.IsMissing(instance[feature])));
datasetInformation.FeatureInformations = model.Features.Select(feature => feature.FeatureInformation).ToArray();
datasetInformation.ObjectsWithIncompleteData = objWithIncompleteData;
datasetInformation.GlobalAbscenseInformation = model.Features.Sum(feature => feature.FeatureInformation.MissingValueCount);
model.DatasetInformation = datasetInformation;
}
}
}