using Microsoft.ML; using Microsoft.ML.AutoML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; using NPOI.XSSF.UserModel; using System; using System.Collections.Generic; using System.Dynamic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; using System.Security.AccessControl; using System.Text; using System.Threading.Tasks; using System.Xml.Linq; namespace testML { internal class Program { static Random rnd = new Random(); static void Main(string[] args) { XSSFWorkbook wb; using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) { wb = new XSSFWorkbook(file); } var sheet = wb.GetSheetAt(0); var headerRow = sheet.GetRow(0); #region Preparamos los datos de entrenamiento var tmpData = new List>(); for (var r = 1; r < sheet.LastRowNum - 1; r++) { if (r == 30) break; Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); var row = sheet.GetRow(r); var rowData = new Dictionary(); string prefix = string.Empty; for (var c = 0; c < headerRow.LastCellNum; c++) { var usePrefix = true; var columnName = headerRow.GetCell(c)?.StringCellValue; columnName = FixColumnName(columnName); object value = null; if (columnName == "PMASCULINO") { prefix = "MASCULINO_"; usePrefix = false; } if (columnName == "PFEMENINO") { prefix = "FEMENINO_"; usePrefix = false; } if (columnName == "DESCENDIENTE") { prefix = "DESCENDIENTE_"; usePrefix = false; } switch (row.GetCell(c)?.CellType) { case NPOI.SS.UserModel.CellType.Numeric: value = row.GetCell(c)?.NumericCellValue; break; case NPOI.SS.UserModel.CellType.String: value = row.GetCell(c)?.StringCellValue; break; } string valuePrefix = string.Empty; if (columnName.StartsWith("S4i") || columnName.StartsWith("SNP")) { valuePrefix = columnName + "_"; } var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName; if (value is string) { rowData.Add(finalColumnName, valuePrefix + value); } else { rowData.Add(finalColumnName, value?.ToString() ?? ""); } } tmpData.Add(rowData); } #endregion MLContext mlContext = new MLContext(); var dataConverted = DictionaryToObjectConverter.Convert(tmpData, out Type classType, out DataViewSchema schema); var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault(); var loadMethodObj = loadMethod.MakeGenericMethod(classType); var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null }); //var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema); //var data = new DictionaryView(tmpData, schema.ToSchema(), converter); #region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión) DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; #endregion #region Preparamos los datos de entrada y salida //var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100); var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f); //var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001"); //IEstimator pipe = (IEstimator)pipeline; //pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label")); //pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label")); var firstRow = tmpData[0] as IDictionary; var columnInference = new ColumnInformation() { LabelColumnName = "DESCENDIENTE_S4i001" }; foreach (var key in firstRow.Keys) { if (key == columnInference.LabelColumnName) { continue; } if (key.Contains("_S4i") || key.Contains("_SNP")) { columnInference.CategoricalColumnNames.Add(key); } } mlContext.Log += (_, e) => { if (e.Source.Equals("AutoMLExperiment")) { Console.WriteLine(e.RawMessage); } }; SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference) .Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName)); AutoMLExperiment experiment = mlContext.Auto().CreateExperiment(); experiment .SetPipeline(pipeline) .SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName) .SetTrainingTimeInSeconds(60) .SetDataset(trainData); var result = experiment.Run(); #endregion /* //Entrenamos el modelo ITransformer model = pipe.Fit(trainData); #region Hacemos un test para medir el % de error // Use trained model to make inferences on test data IDataView testDataPredictions = model.Transform(testData); // Extract model metrics and get RSquared RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions); double rSquared = trainedModelMetrics.RSquared; Console.WriteLine("ModelMetrics: {0}", rSquared); #endregion #region Ponemos a prueba haciendo algunas predicciones var predictionFunction = mlContext.Model.CreatePredictionEngine(model); for (var c = 0; c < 25; c++) { var test = CreateRandomData(); var expected = test.IntegerNumber; test.IntegerNumber = 0; var p = predictionFunction.Predict(test); Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber); } #endregion */ Console.WriteLine(); Console.WriteLine("Press enter to Exit"); Console.ReadLine(); } private static string FixColumnName(string columnName) { var result = new StringBuilder(columnName.Length); foreach (var c in columnName) { if (c == 'º' || c == 'ª') { continue; } if (char.IsLetter(c) || char.IsNumber(c) || (c == '_')) { result.Append(c); } } return result.ToString(); } private static Data CreateRandomData() { var d = new Data() { Accession = rnd.Next(0, 99999999).ToString("00000000"), Enum1 = rnd.Next(1, 4), Enum2 = rnd.Next(1, 11), Enum3 = rnd.Next(1, 6), Enum4 = rnd.Next(1, 6), // StringTest = tags[rnd.Next(0, tags.Length)] }; d.Enum4 = d.Enum1 + d.Enum2; // Ponemos algunos datos que tengan alguna relación (la red neuronal debería calibrarse para comprender esta formula) d.IntegerNumber = (((d.Enum1 + d.Enum2) - (d.Enum3 + d.Enum4)) * 5.25f) + d.StringTest.Length; d.DecimalNumber = (d.Enum2 / d.Enum1) * (2.0f + (1.0f / d.StringTest.Length)); if (d.StringTest == "Azul") { d.IntegerNumber += 10; d.OrigenResultNumber = 1; } if (d.StringTest == "Rojo") { d.IntegerNumber += 5f; d.OrigenResultNumber = 1; } return d; } } }