Ya hace predicciones y crea el ZIP
This commit is contained in:
@@ -16,7 +16,7 @@ namespace testML
|
||||
{
|
||||
public static class DictionaryToObjectConverter
|
||||
{
|
||||
public static IEnumerable<object> Convert(List<Dictionary<string, object>> data, string toPredict, out Type classType, out DataViewSchema schema)
|
||||
public static IEnumerable<object> Convert(List<Dictionary<string, object>> data, string toPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema)
|
||||
{
|
||||
var schemaBuilder = new DataViewSchema.Builder();
|
||||
|
||||
@@ -32,12 +32,6 @@ namespace testML
|
||||
if (sampleValue != null)
|
||||
{
|
||||
var keyType = sampleValue.GetType();
|
||||
|
||||
if (key == toPredict)
|
||||
{
|
||||
keyType = typeof(float);
|
||||
}
|
||||
|
||||
definition.Add(key, keyType);
|
||||
|
||||
if (keyType == typeof(string))
|
||||
@@ -101,25 +95,12 @@ namespace testML
|
||||
var dllAssembly = compilerResults.CompiledAssembly;
|
||||
|
||||
classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName);
|
||||
classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName + "Prediction");
|
||||
|
||||
Type listType = typeof(List<>);
|
||||
Type genericType = listType.MakeGenericType(classType);
|
||||
|
||||
var result =(IList) Activator.CreateInstance(genericType) ;
|
||||
|
||||
Dictionary<string, float> translate = new Dictionary<string, float>();
|
||||
translate.Add(string.Empty, 0);
|
||||
foreach (var inputData in data)
|
||||
{
|
||||
if (inputData.ContainsKey(toPredict) && inputData[toPredict] != null)
|
||||
{
|
||||
if (!translate.ContainsKey(inputData[toPredict] as string))
|
||||
{
|
||||
var max = translate.Values.Max()+1;
|
||||
translate.Add(inputData[toPredict] as string, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
var result = (IList)Activator.CreateInstance(genericType);
|
||||
|
||||
|
||||
foreach (var inputData in data)
|
||||
@@ -128,17 +109,10 @@ namespace testML
|
||||
result.Add(outputData);
|
||||
|
||||
foreach (var key in inputData.Keys)
|
||||
{
|
||||
if (key == toPredict)
|
||||
{
|
||||
outputData[key] = translate[inputData[key] as string ?? string.Empty];
|
||||
}
|
||||
else
|
||||
{
|
||||
outputData[key] = inputData[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
schema = schemaBuilder.ToSchema();
|
||||
|
||||
|
||||
@@ -28,8 +28,14 @@ namespace testML
|
||||
/// </summary>
|
||||
public virtual string TransformText()
|
||||
{
|
||||
this.Write("\r\nusing System;\r\nusing System.Text;\r\nusing Microsoft.ML.Data;\r\n\r\nnamespace Dictio" +
|
||||
"naryToObjectConverterNamespace\r\n{\r\n\tpublic class ");
|
||||
|
||||
#line 6 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
var toPredictType = Definition[ToPredict];
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write("using System;\r\nusing System.Text;\r\nusing Microsoft.ML.Data;\r\n\r\nnamespace Dictiona" +
|
||||
"ryToObjectConverterNamespace\r\n{\r\n\tpublic class ");
|
||||
|
||||
#line 13 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ClassName));
|
||||
@@ -141,7 +147,77 @@ namespace testML
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write("\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n}");
|
||||
this.Write("\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n\r\n\tpublic class ");
|
||||
|
||||
#line 54 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ClassName));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write("Prediction: testML.IDictionaryToObjectConverter\r\n\t{\r\n\t\t[ColumnName(\"PredictedLabe" +
|
||||
"l\")]\t\r\n\t\tpublic ");
|
||||
|
||||
#line 57 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(toPredictType.FullName));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write(" ");
|
||||
|
||||
#line 57 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write(@" { get; set; }
|
||||
|
||||
public object this[string propertyName]
|
||||
{
|
||||
get { return GetValue(propertyName); }
|
||||
set { SetValue(propertyName, value); }
|
||||
}
|
||||
|
||||
public void SetValue(string propertyName, object value)
|
||||
{
|
||||
switch(propertyName)
|
||||
{
|
||||
case """);
|
||||
|
||||
#line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write("\":\t");
|
||||
|
||||
#line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write(" = (");
|
||||
|
||||
#line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(toPredictType.FullName));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write(")value;\tbreak;\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tpublic object GetValue(string propertyName)\r\n\t\t{\r\n" +
|
||||
"\t\t\tswitch(propertyName)\r\n\t\t\t{\r\n\t\t\t\tcase \"");
|
||||
|
||||
#line 77 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write("\":\treturn ");
|
||||
|
||||
#line 77 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt"
|
||||
this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict));
|
||||
|
||||
#line default
|
||||
#line hidden
|
||||
this.Write(";\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n}");
|
||||
return this.GenerationEnvironment.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<#@ import namespace="System.Linq" #>
|
||||
<#@ import namespace="System.Text" #>
|
||||
<#@ import namespace="System.Collections.Generic" #>
|
||||
|
||||
<# var toPredictType = Definition[ToPredict]; #>
|
||||
using System;
|
||||
using System.Text;
|
||||
using Microsoft.ML.Data;
|
||||
@@ -50,4 +50,33 @@ namespace DictionaryToObjectConverterNamespace
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public class <#= ClassName #>Prediction: testML.IDictionaryToObjectConverter
|
||||
{
|
||||
[ColumnName("PredictedLabel")]
|
||||
public <#= toPredictType.FullName #> <#= ToPredict #> { get; set; }
|
||||
|
||||
public object this[string propertyName]
|
||||
{
|
||||
get { return GetValue(propertyName); }
|
||||
set { SetValue(propertyName, value); }
|
||||
}
|
||||
|
||||
public void SetValue(string propertyName, object value)
|
||||
{
|
||||
switch(propertyName)
|
||||
{
|
||||
case "<#= ToPredict #>": <#= ToPredict #> = (<#= toPredictType.FullName #>)value; break;
|
||||
}
|
||||
}
|
||||
|
||||
public object GetValue(string propertyName)
|
||||
{
|
||||
switch(propertyName)
|
||||
{
|
||||
case "<#= ToPredict #>": return <#= ToPredict #>;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
using Microsoft.ML.AutoML;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.Trainers;
|
||||
using Microsoft.ML.Transforms.Text;
|
||||
using Microsoft.SqlServer.Server;
|
||||
using NPOI.XSSF.UserModel;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
@@ -24,7 +26,8 @@ namespace testML
|
||||
static void Main(string[] args)
|
||||
{
|
||||
XSSFWorkbook wb;
|
||||
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
|
||||
//using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
|
||||
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read))
|
||||
{
|
||||
wb = new XSSFWorkbook(file);
|
||||
}
|
||||
@@ -40,7 +43,7 @@ namespace testML
|
||||
|
||||
for (var r = 1; r < sheet.LastRowNum - 1; r++)
|
||||
{
|
||||
if (r == 300) break;
|
||||
//if (r == 50) break;
|
||||
Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1));
|
||||
var row = sheet.GetRow(r);
|
||||
|
||||
@@ -102,86 +105,133 @@ namespace testML
|
||||
tmpData.Add(rowData);
|
||||
}
|
||||
|
||||
|
||||
var columnToPredict = "DESCENDIENTE_S4i001";
|
||||
|
||||
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||
foreach (var key in firstRow.Keys.ToArray())
|
||||
{
|
||||
var firstValue = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).FirstOrDefault();
|
||||
if (firstValue == null)
|
||||
{
|
||||
foreach (var item in tmpData)
|
||||
{
|
||||
if (item.ContainsKey(key))
|
||||
{
|
||||
item.Remove(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
||||
|
||||
MLContext mlContext = new MLContext();
|
||||
|
||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, "DESCENDIENTE_S4i001", out Type classType, out DataViewSchema schema);
|
||||
|
||||
|
||||
mlContext.Log += (_, e) =>
|
||||
{
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; }
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" CursorSplitter")) { return; }
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Consolidate")) { return; }
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Training")) { return; }
|
||||
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.Equals("RangeFilter; Checking parameters")) { return; }
|
||||
|
||||
|
||||
//if (e.Source.Equals("AutoMLExperiment"))
|
||||
//{
|
||||
Console.WriteLine(e.RawMessage);
|
||||
//}
|
||||
};
|
||||
|
||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
|
||||
|
||||
tmpData = null; //Liberamos la memoria
|
||||
|
||||
|
||||
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
|
||||
|
||||
var loadMethodObj = loadMethod.MakeGenericMethod(classType);
|
||||
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
|
||||
|
||||
|
||||
//var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema);
|
||||
|
||||
//var data = new DictionaryView<Expando>(tmpData, schema.ToSchema(), converter);
|
||||
|
||||
#region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión)
|
||||
|
||||
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
|
||||
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.5);
|
||||
IDataView trainData = dataSplit.TrainSet;
|
||||
IDataView testData = dataSplit.TestSet;
|
||||
|
||||
#endregion
|
||||
|
||||
|
||||
#region Preparamos los datos de entrada y salida
|
||||
|
||||
//var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100);
|
||||
var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f);
|
||||
|
||||
//var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001");
|
||||
//IEstimator<ITransformer> pipe = (IEstimator<ITransformer>)pipeline;
|
||||
|
||||
//pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label"));
|
||||
//pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label"));
|
||||
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||
|
||||
var columnInference = new ColumnInformation()
|
||||
var columnNameAndTypes = new Dictionary<string, Type>();
|
||||
foreach (var item in (from x in firstRow.Keys
|
||||
select new { Key = x, Type = (from y in dataConverted.Cast<IDictionaryToObjectConverter>() where y.GetValue(x) != null select y.GetValue(x).GetType()).FirstOrDefault() })
|
||||
)
|
||||
{
|
||||
LabelColumnName = "DESCENDIENTE_S4i001"
|
||||
};
|
||||
|
||||
foreach (var key in firstRow.Keys)
|
||||
{
|
||||
if (key == columnInference.LabelColumnName)
|
||||
{
|
||||
continue;
|
||||
columnNameAndTypes.Add(item.Key, item.Type);
|
||||
}
|
||||
|
||||
if (key.Contains("_S4i") || key.Contains("_SNP"))
|
||||
var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes);
|
||||
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
|
||||
|
||||
Console.WriteLine("Training...");
|
||||
var _trainedModel = trainingPipeline.Fit(trainData);
|
||||
|
||||
mlContext.Model.Save(_trainedModel, data.Schema, columnToPredict + ".zip");
|
||||
|
||||
var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault();
|
||||
var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType);
|
||||
var _predEngine = createPredictionEngineMethodObj.Invoke(mlContext.Model, new object[] { _trainedModel, null, null, null });
|
||||
|
||||
//Test
|
||||
var predictMethod = _predEngine.GetType().GetMethods().Where(x => x.Name == "Predict" && x.GetParameters().Length == 1 && x.GetParameters()[0].ParameterType == classType).FirstOrDefault();
|
||||
|
||||
var ok = 0;
|
||||
var fail = 0;
|
||||
foreach (var item in dataConverted.Cast<IDictionaryToObjectConverter>())
|
||||
{
|
||||
columnInference.CategoricalColumnNames.Add(key);
|
||||
}
|
||||
}
|
||||
var expected = item.GetValue(columnToPredict);
|
||||
if (expected == null || string.IsNullOrEmpty(expected as string)) { continue; }
|
||||
|
||||
mlContext.Log += (_, e) => {
|
||||
if (e.Source.Equals("AutoMLExperiment"))
|
||||
item.SetValue(columnToPredict, null);
|
||||
|
||||
var prediction = predictMethod.Invoke(_predEngine, new object[] { item }) as IDictionaryToObjectConverter;
|
||||
|
||||
var predicted = prediction.GetValue(columnToPredict);
|
||||
|
||||
if (expected is string a && predicted is string b)
|
||||
{
|
||||
Console.WriteLine(e.RawMessage);
|
||||
Console.Write(item.GetValue("DESCENDIENTE") ?? string.Empty);
|
||||
Console.Write(": ");
|
||||
|
||||
Console.Write(string.Format("Expected: {0}\t\tPredicted: {1}", a, b));
|
||||
|
||||
if (string.Equals(a, b))
|
||||
{
|
||||
ok++;
|
||||
Console.WriteLine("\tOk");
|
||||
}
|
||||
else
|
||||
{
|
||||
fail++;
|
||||
Console.WriteLine("\tERROR!!!");
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference)
|
||||
.Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName));
|
||||
}
|
||||
|
||||
AutoMLExperiment experiment = mlContext.Auto().CreateExperiment();
|
||||
Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000")));
|
||||
|
||||
experiment
|
||||
.SetPipeline(pipeline)
|
||||
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName)
|
||||
.SetTrainingTimeInSeconds(10)
|
||||
.SetDataset(trainData);
|
||||
|
||||
var result = experiment.Run();
|
||||
|
||||
#endregion
|
||||
|
||||
|
||||
/*
|
||||
//Entrenamos el modelo
|
||||
//ITransformer model = pipe.Fit(trainData);
|
||||
|
||||
@@ -215,13 +265,48 @@ namespace testML
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
*/
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Press enter to Exit");
|
||||
Console.ReadLine();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames)
|
||||
{
|
||||
IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label");
|
||||
|
||||
|
||||
var featured = new List<string>();
|
||||
|
||||
foreach (var key in columnNames.Keys)
|
||||
{
|
||||
if (key == predictColumnName) { continue; }
|
||||
if (key.StartsWith("DESCENDIENTE_")) { continue; }
|
||||
|
||||
var type = columnNames[key];
|
||||
if (type == typeof(string))
|
||||
{
|
||||
pipeline = pipeline.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: key, outputColumnName: key));
|
||||
featured.Add(key);
|
||||
}
|
||||
}
|
||||
|
||||
pipeline = pipeline.Append(mlContext.Transforms.Concatenate("Features", featured.ToArray()));
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
public static IEstimator<ITransformer> BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> pipeline, Type modelType, Type prodelPredictionType)
|
||||
{
|
||||
var trainingPipeline = pipeline.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features", maximumNumberOfIterations: 1000))
|
||||
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
|
||||
|
||||
return trainingPipeline;
|
||||
}
|
||||
|
||||
|
||||
private static string FixColumnName(string columnName)
|
||||
{
|
||||
var result = new StringBuilder(columnName.Length);
|
||||
|
||||
Reference in New Issue
Block a user