From 7f68e262f4c040d2922166af12a2805afa243131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Maldonado=20S=C3=A1nchez?= Date: Wed, 18 Jan 2023 14:20:53 +0100 Subject: [PATCH] Ya hace predicciones y crea el ZIP --- testML/DictionaryToObjectConverter.cs | 38 +--- testML/DictionaryToObjectConverterClass.cs | 82 ++++++- testML/DictionaryToObjectConverterClass.tt | 31 ++- testML/Program.cs | 247 ++++++++++++++------- 4 files changed, 281 insertions(+), 117 deletions(-) diff --git a/testML/DictionaryToObjectConverter.cs b/testML/DictionaryToObjectConverter.cs index 41e2ab6..5164f62 100644 --- a/testML/DictionaryToObjectConverter.cs +++ b/testML/DictionaryToObjectConverter.cs @@ -16,9 +16,9 @@ namespace testML { public static class DictionaryToObjectConverter { - public static IEnumerable Convert(List> data, string toPredict, out Type classType, out DataViewSchema schema) + public static IEnumerable Convert(List> data, string toPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema) { - var schemaBuilder = new DataViewSchema.Builder(); + var schemaBuilder = new DataViewSchema.Builder(); var definition = new Dictionary(); @@ -32,12 +32,6 @@ namespace testML if (sampleValue != null) { var keyType = sampleValue.GetType(); - - if (key == toPredict) - { - keyType = typeof(float); - } - definition.Add(key, keyType); if (keyType == typeof(string)) @@ -101,42 +95,22 @@ namespace testML var dllAssembly = compilerResults.CompiledAssembly; classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName); + classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName + "Prediction"); Type listType = typeof(List<>); Type genericType = listType.MakeGenericType(classType); - var result =(IList) Activator.CreateInstance(genericType) ; + var result = (IList)Activator.CreateInstance(genericType); + - Dictionary translate = new Dictionary(); - translate.Add(string.Empty, 0); foreach (var inputData in data) - { - if (inputData.ContainsKey(toPredict) && inputData[toPredict] != null) - { - if (!translate.ContainsKey(inputData[toPredict] as string)) - { - var max = translate.Values.Max()+1; - translate.Add(inputData[toPredict] as string, max); - } - } - } - - - foreach (var inputData in data) { var outputData = (IDictionaryToObjectConverter)Activator.CreateInstance(classType); result.Add(outputData); foreach (var key in inputData.Keys) { - if (key == toPredict) - { - outputData[key] = translate[inputData[key] as string ?? string.Empty]; - } - else - { - outputData[key] = inputData[key]; - } + outputData[key] = inputData[key]; } } diff --git a/testML/DictionaryToObjectConverterClass.cs b/testML/DictionaryToObjectConverterClass.cs index f2d594d..8b76bd2 100644 --- a/testML/DictionaryToObjectConverterClass.cs +++ b/testML/DictionaryToObjectConverterClass.cs @@ -28,8 +28,14 @@ namespace testML /// public virtual string TransformText() { - this.Write("\r\nusing System;\r\nusing System.Text;\r\nusing Microsoft.ML.Data;\r\n\r\nnamespace Dictio" + - "naryToObjectConverterNamespace\r\n{\r\n\tpublic class "); + + #line 6 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + var toPredictType = Definition[ToPredict]; + + #line default + #line hidden + this.Write("using System;\r\nusing System.Text;\r\nusing Microsoft.ML.Data;\r\n\r\nnamespace Dictiona" + + "ryToObjectConverterNamespace\r\n{\r\n\tpublic class "); #line 13 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" this.Write(this.ToStringHelper.ToStringWithCulture(ClassName)); @@ -141,7 +147,77 @@ namespace testML #line default #line hidden - this.Write("\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n}"); + this.Write("\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n\r\n\tpublic class "); + + #line 54 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ClassName)); + + #line default + #line hidden + this.Write("Prediction: testML.IDictionaryToObjectConverter\r\n\t{\r\n\t\t[ColumnName(\"PredictedLabe" + + "l\")]\t\r\n\t\tpublic "); + + #line 57 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(toPredictType.FullName)); + + #line default + #line hidden + this.Write(" "); + + #line 57 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict)); + + #line default + #line hidden + this.Write(@" { get; set; } + + public object this[string propertyName] + { + get { return GetValue(propertyName); } + set { SetValue(propertyName, value); } + } + + public void SetValue(string propertyName, object value) + { + switch(propertyName) + { + case """); + + #line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict)); + + #line default + #line hidden + this.Write("\":\t"); + + #line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict)); + + #line default + #line hidden + this.Write(" = ("); + + #line 69 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(toPredictType.FullName)); + + #line default + #line hidden + this.Write(")value;\tbreak;\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tpublic object GetValue(string propertyName)\r\n\t\t{\r\n" + + "\t\t\tswitch(propertyName)\r\n\t\t\t{\r\n\t\t\t\tcase \""); + + #line 77 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict)); + + #line default + #line hidden + this.Write("\":\treturn "); + + #line 77 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ToPredict)); + + #line default + #line hidden + this.Write(";\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n}"); return this.GenerationEnvironment.ToString(); } } diff --git a/testML/DictionaryToObjectConverterClass.tt b/testML/DictionaryToObjectConverterClass.tt index 9a3abd6..9cb26bf 100644 --- a/testML/DictionaryToObjectConverterClass.tt +++ b/testML/DictionaryToObjectConverterClass.tt @@ -3,7 +3,7 @@ <#@ import namespace="System.Linq" #> <#@ import namespace="System.Text" #> <#@ import namespace="System.Collections.Generic" #> - +<# var toPredictType = Definition[ToPredict]; #> using System; using System.Text; using Microsoft.ML.Data; @@ -50,4 +50,33 @@ namespace DictionaryToObjectConverterNamespace return null; } } + + public class <#= ClassName #>Prediction: testML.IDictionaryToObjectConverter + { + [ColumnName("PredictedLabel")] + public <#= toPredictType.FullName #> <#= ToPredict #> { get; set; } + + public object this[string propertyName] + { + get { return GetValue(propertyName); } + set { SetValue(propertyName, value); } + } + + public void SetValue(string propertyName, object value) + { + switch(propertyName) + { + case "<#= ToPredict #>": <#= ToPredict #> = (<#= toPredictType.FullName #>)value; break; + } + } + + public object GetValue(string propertyName) + { + switch(propertyName) + { + case "<#= ToPredict #>": return <#= ToPredict #>; + } + return null; + } + } } \ No newline at end of file diff --git a/testML/Program.cs b/testML/Program.cs index 25ceb6a..546c671 100644 --- a/testML/Program.cs +++ b/testML/Program.cs @@ -2,6 +2,8 @@ using Microsoft.ML.AutoML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms.Text; +using Microsoft.SqlServer.Server; using NPOI.XSSF.UserModel; using System; using System.Collections.Generic; @@ -24,7 +26,8 @@ namespace testML static void Main(string[] args) { XSSFWorkbook wb; - using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) + //using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) + using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read)) { wb = new XSSFWorkbook(file); } @@ -40,7 +43,7 @@ namespace testML for (var r = 1; r < sheet.LastRowNum - 1; r++) { - if (r == 300) break; + //if (r == 50) break; Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); var row = sheet.GetRow(r); @@ -102,126 +105,208 @@ namespace testML tmpData.Add(rowData); } + + var columnToPredict = "DESCENDIENTE_S4i001"; + + var firstRow = tmpData[0] as IDictionary; + foreach (var key in firstRow.Keys.ToArray()) + { + var firstValue = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).FirstOrDefault(); + if (firstValue == null) + { + foreach (var item in tmpData) + { + if (item.ContainsKey(key)) + { + item.Remove(key); + } + } + } + } #endregion MLContext mlContext = new MLContext(); - var dataConverted = DictionaryToObjectConverter.Convert(tmpData, "DESCENDIENTE_S4i001", out Type classType, out DataViewSchema schema); + + + mlContext.Log += (_, e) => + { + if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; } + if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" CursorSplitter")) { return; } + if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Consolidate")) { return; } + if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Training")) { return; } + + if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.Equals("RangeFilter; Checking parameters")) { return; } + + + //if (e.Source.Equals("AutoMLExperiment")) + //{ + Console.WriteLine(e.RawMessage); + //} + }; + + var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema); + + tmpData = null; //Liberamos la memoria + var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault(); - var loadMethodObj = loadMethod.MakeGenericMethod(classType); var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null }); - - //var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema); - - //var data = new DictionaryView(tmpData, schema.ToSchema(), converter); - #region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión) - DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); + DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.5); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; #endregion + #region Preparamos los datos de entrada y salida - //var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100); - var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f); - //var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001"); - //IEstimator pipe = (IEstimator)pipeline; - - //pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label")); - //pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label")); - var firstRow = tmpData[0] as IDictionary; - - var columnInference = new ColumnInformation() + var columnNameAndTypes = new Dictionary(); + foreach (var item in (from x in firstRow.Keys + select new { Key = x, Type = (from y in dataConverted.Cast() where y.GetValue(x) != null select y.GetValue(x).GetType()).FirstOrDefault() }) + ) { - LabelColumnName = "DESCENDIENTE_S4i001" - }; - - foreach (var key in firstRow.Keys) - { - if (key == columnInference.LabelColumnName) - { - continue; - } - - if (key.Contains("_S4i") || key.Contains("_SNP")) - { - columnInference.CategoricalColumnNames.Add(key); - } + columnNameAndTypes.Add(item.Key, item.Type); } - mlContext.Log += (_, e) => { - if (e.Source.Equals("AutoMLExperiment")) - { - Console.WriteLine(e.RawMessage); - } - }; + var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes); + var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType); + Console.WriteLine("Training..."); + var _trainedModel = trainingPipeline.Fit(trainData); - SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference) - .Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName)); + mlContext.Model.Save(_trainedModel, data.Schema, columnToPredict + ".zip"); - AutoMLExperiment experiment = mlContext.Auto().CreateExperiment(); + var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault(); + var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType); + var _predEngine = createPredictionEngineMethodObj.Invoke(mlContext.Model, new object[] { _trainedModel, null, null, null }); - experiment - .SetPipeline(pipeline) - .SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName) - .SetTrainingTimeInSeconds(10) - .SetDataset(trainData); + //Test + var predictMethod = _predEngine.GetType().GetMethods().Where(x => x.Name == "Predict" && x.GetParameters().Length == 1 && x.GetParameters()[0].ParameterType == classType).FirstOrDefault(); - var result = experiment.Run(); - - #endregion - - - //Entrenamos el modelo - //ITransformer model = pipe.Fit(trainData); - - #region Hacemos un test para medir el % de error - - // Use trained model to make inferences on test data - IDataView testDataPredictions = result.Model.Transform(testData); - - // Extract model metrics and get RSquared - RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions, labelColumnName: columnInference.LabelColumnName); - double rSquared = trainedModelMetrics.RSquared; - - Console.WriteLine("ModelMetrics: {0}", rSquared); - - #endregion - - - #region Ponemos a prueba haciendo algunas predicciones - - var predictionFunction = mlContext.Model.CreatePredictionEngine(result.Model); - - for (var c = 0; c < 25; c++) + var ok = 0; + var fail = 0; + foreach (var item in dataConverted.Cast()) { - var test = CreateRandomData(); - var expected = test.IntegerNumber; - test.IntegerNumber = 0; + var expected = item.GetValue(columnToPredict); + if (expected == null || string.IsNullOrEmpty(expected as string)) { continue; } - var p = predictionFunction.Predict(test); + item.SetValue(columnToPredict, null); + + var prediction = predictMethod.Invoke(_predEngine, new object[] { item }) as IDictionaryToObjectConverter; + + var predicted = prediction.GetValue(columnToPredict); + + if (expected is string a && predicted is string b) + { + Console.Write(item.GetValue("DESCENDIENTE") ?? string.Empty); + Console.Write(": "); + + Console.Write(string.Format("Expected: {0}\t\tPredicted: {1}", a, b)); + + if (string.Equals(a, b)) + { + ok++; + Console.WriteLine("\tOk"); + } + else + { + fail++; + Console.WriteLine("\tERROR!!!"); + } + + } - Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber); } + Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000"))); + + #endregion - + + /* + //Entrenamos el modelo + //ITransformer model = pipe.Fit(trainData); + + #region Hacemos un test para medir el % de error + + // Use trained model to make inferences on test data + IDataView testDataPredictions = result.Model.Transform(testData); + + // Extract model metrics and get RSquared + RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions, labelColumnName: columnInference.LabelColumnName); + double rSquared = trainedModelMetrics.RSquared; + + Console.WriteLine("ModelMetrics: {0}", rSquared); + + #endregion + + + #region Ponemos a prueba haciendo algunas predicciones + + var predictionFunction = mlContext.Model.CreatePredictionEngine(result.Model); + + for (var c = 0; c < 25; c++) + { + var test = CreateRandomData(); + var expected = test.IntegerNumber; + test.IntegerNumber = 0; + + var p = predictionFunction.Predict(test); + + Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber); + } + + #endregion + */ Console.WriteLine(); Console.WriteLine("Press enter to Exit"); Console.ReadLine(); } + + private static IEstimator ProcessData(MLContext mlContext, string predictColumnName, Dictionary columnNames) + { + IEstimator pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label"); + + + var featured = new List(); + + foreach (var key in columnNames.Keys) + { + if (key == predictColumnName) { continue; } + if (key.StartsWith("DESCENDIENTE_")) { continue; } + + var type = columnNames[key]; + if (type == typeof(string)) + { + pipeline = pipeline.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: key, outputColumnName: key)); + featured.Add(key); + } + } + + pipeline = pipeline.Append(mlContext.Transforms.Concatenate("Features", featured.ToArray())); + + return pipeline; + } + + public static IEstimator BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator pipeline, Type modelType, Type prodelPredictionType) + { + var trainingPipeline = pipeline.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features", maximumNumberOfIterations: 1000)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + return trainingPipeline; + } + + private static string FixColumnName(string columnName) { var result = new StringBuilder(columnName.Length);