1
0

Ya hace predicciones y crea el ZIP

This commit is contained in:
2023-01-18 14:20:53 +01:00
parent fe96cd41a4
commit 7f68e262f4
4 changed files with 281 additions and 117 deletions

View File

@@ -2,6 +2,8 @@
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;
using Microsoft.SqlServer.Server;
using NPOI.XSSF.UserModel;
using System;
using System.Collections.Generic;
@@ -24,7 +26,8 @@ namespace testML
static void Main(string[] args)
{
XSSFWorkbook wb;
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
//using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read))
{
wb = new XSSFWorkbook(file);
}
@@ -40,7 +43,7 @@ namespace testML
for (var r = 1; r < sheet.LastRowNum - 1; r++)
{
if (r == 300) break;
//if (r == 50) break;
Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1));
var row = sheet.GetRow(r);
@@ -102,126 +105,208 @@ namespace testML
tmpData.Add(rowData);
}
var columnToPredict = "DESCENDIENTE_S4i001";
var firstRow = tmpData[0] as IDictionary<string, object>;
foreach (var key in firstRow.Keys.ToArray())
{
var firstValue = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).FirstOrDefault();
if (firstValue == null)
{
foreach (var item in tmpData)
{
if (item.ContainsKey(key))
{
item.Remove(key);
}
}
}
}
#endregion
MLContext mlContext = new MLContext();
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, "DESCENDIENTE_S4i001", out Type classType, out DataViewSchema schema);
mlContext.Log += (_, e) =>
{
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; }
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" CursorSplitter")) { return; }
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Consolidate")) { return; }
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Training")) { return; }
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.Equals("RangeFilter; Checking parameters")) { return; }
//if (e.Source.Equals("AutoMLExperiment"))
//{
Console.WriteLine(e.RawMessage);
//}
};
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
tmpData = null; //Liberamos la memoria
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
var loadMethodObj = loadMethod.MakeGenericMethod(classType);
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
//var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema);
//var data = new DictionaryView<Expando>(tmpData, schema.ToSchema(), converter);
#region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión)
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.1);
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.5);
IDataView trainData = dataSplit.TrainSet;
IDataView testData = dataSplit.TestSet;
#endregion
#region Preparamos los datos de entrada y salida
//var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100);
var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f);
//var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001");
//IEstimator<ITransformer> pipe = (IEstimator<ITransformer>)pipeline;
//pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label"));
//pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label"));
var firstRow = tmpData[0] as IDictionary<string, object>;
var columnInference = new ColumnInformation()
var columnNameAndTypes = new Dictionary<string, Type>();
foreach (var item in (from x in firstRow.Keys
select new { Key = x, Type = (from y in dataConverted.Cast<IDictionaryToObjectConverter>() where y.GetValue(x) != null select y.GetValue(x).GetType()).FirstOrDefault() })
)
{
LabelColumnName = "DESCENDIENTE_S4i001"
};
foreach (var key in firstRow.Keys)
{
if (key == columnInference.LabelColumnName)
{
continue;
}
if (key.Contains("_S4i") || key.Contains("_SNP"))
{
columnInference.CategoricalColumnNames.Add(key);
}
columnNameAndTypes.Add(item.Key, item.Type);
}
mlContext.Log += (_, e) => {
if (e.Source.Equals("AutoMLExperiment"))
{
Console.WriteLine(e.RawMessage);
}
};
var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes);
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
Console.WriteLine("Training...");
var _trainedModel = trainingPipeline.Fit(trainData);
SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference)
.Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName));
mlContext.Model.Save(_trainedModel, data.Schema, columnToPredict + ".zip");
AutoMLExperiment experiment = mlContext.Auto().CreateExperiment();
var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault();
var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType);
var _predEngine = createPredictionEngineMethodObj.Invoke(mlContext.Model, new object[] { _trainedModel, null, null, null });
experiment
.SetPipeline(pipeline)
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName)
.SetTrainingTimeInSeconds(10)
.SetDataset(trainData);
//Test
var predictMethod = _predEngine.GetType().GetMethods().Where(x => x.Name == "Predict" && x.GetParameters().Length == 1 && x.GetParameters()[0].ParameterType == classType).FirstOrDefault();
var result = experiment.Run();
#endregion
//Entrenamos el modelo
//ITransformer model = pipe.Fit(trainData);
#region Hacemos un test para medir el % de error
// Use trained model to make inferences on test data
IDataView testDataPredictions = result.Model.Transform(testData);
// Extract model metrics and get RSquared
RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions, labelColumnName: columnInference.LabelColumnName);
double rSquared = trainedModelMetrics.RSquared;
Console.WriteLine("ModelMetrics: {0}", rSquared);
#endregion
#region Ponemos a prueba haciendo algunas predicciones
var predictionFunction = mlContext.Model.CreatePredictionEngine<Data, DataPrediction>(result.Model);
for (var c = 0; c < 25; c++)
var ok = 0;
var fail = 0;
foreach (var item in dataConverted.Cast<IDictionaryToObjectConverter>())
{
var test = CreateRandomData();
var expected = test.IntegerNumber;
test.IntegerNumber = 0;
var expected = item.GetValue(columnToPredict);
if (expected == null || string.IsNullOrEmpty(expected as string)) { continue; }
var p = predictionFunction.Predict(test);
item.SetValue(columnToPredict, null);
var prediction = predictMethod.Invoke(_predEngine, new object[] { item }) as IDictionaryToObjectConverter;
var predicted = prediction.GetValue(columnToPredict);
if (expected is string a && predicted is string b)
{
Console.Write(item.GetValue("DESCENDIENTE") ?? string.Empty);
Console.Write(": ");
Console.Write(string.Format("Expected: {0}\t\tPredicted: {1}", a, b));
if (string.Equals(a, b))
{
ok++;
Console.WriteLine("\tOk");
}
else
{
fail++;
Console.WriteLine("\tERROR!!!");
}
}
Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber);
}
Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000")));
#endregion
/*
//Entrenamos el modelo
//ITransformer model = pipe.Fit(trainData);
#region Hacemos un test para medir el % de error
// Use trained model to make inferences on test data
IDataView testDataPredictions = result.Model.Transform(testData);
// Extract model metrics and get RSquared
RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions, labelColumnName: columnInference.LabelColumnName);
double rSquared = trainedModelMetrics.RSquared;
Console.WriteLine("ModelMetrics: {0}", rSquared);
#endregion
#region Ponemos a prueba haciendo algunas predicciones
var predictionFunction = mlContext.Model.CreatePredictionEngine<Data, DataPrediction>(result.Model);
for (var c = 0; c < 25; c++)
{
var test = CreateRandomData();
var expected = test.IntegerNumber;
test.IntegerNumber = 0;
var p = predictionFunction.Predict(test);
Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber);
}
#endregion
*/
Console.WriteLine();
Console.WriteLine("Press enter to Exit");
Console.ReadLine();
}
private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames)
{
IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label");
var featured = new List<string>();
foreach (var key in columnNames.Keys)
{
if (key == predictColumnName) { continue; }
if (key.StartsWith("DESCENDIENTE_")) { continue; }
var type = columnNames[key];
if (type == typeof(string))
{
pipeline = pipeline.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: key, outputColumnName: key));
featured.Add(key);
}
}
pipeline = pipeline.Append(mlContext.Transforms.Concatenate("Features", featured.ToArray()));
return pipeline;
}
public static IEstimator<ITransformer> BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> pipeline, Type modelType, Type prodelPredictionType)
{
var trainingPipeline = pipeline.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features", maximumNumberOfIterations: 1000))
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
return trainingPipeline;
}
private static string FixColumnName(string columnName)
{
var result = new StringBuilder(columnName.Length);