Intentando Clasificar (sin funcionar)
This commit is contained in:
@@ -1,10 +1,16 @@
|
||||
using Microsoft.ML;
|
||||
using Microsoft.ML.AutoML;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.Trainers;
|
||||
using NPOI.XSSF.UserModel;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Dynamic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Security.AccessControl;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using System.Xml.Linq;
|
||||
@@ -13,27 +19,106 @@ namespace testML
|
||||
{
|
||||
internal class Program
|
||||
{
|
||||
static string[] tags = new string[] { "Rojo", "Amarillo", "Verde claro", "Verde oscuro", "Violeta", "Naranja", "Azul", "Blanco" };
|
||||
static Random rnd = new Random();
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
XSSFWorkbook wb;
|
||||
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
|
||||
{
|
||||
wb = new XSSFWorkbook(file);
|
||||
}
|
||||
|
||||
var sheet = wb.GetSheetAt(0);
|
||||
|
||||
var headerRow = sheet.GetRow(0);
|
||||
|
||||
|
||||
#region Preparamos los datos de entrenamiento
|
||||
|
||||
var tmpData = new List<Data>();
|
||||
|
||||
for (var c = 0; c < 15000; c++)
|
||||
var tmpData = new List<Dictionary<string, object>>();
|
||||
|
||||
for (var r = 1; r < sheet.LastRowNum - 1; r++)
|
||||
{
|
||||
var d = CreateRandomData();
|
||||
tmpData.Add(d);
|
||||
if (r == 30) break;
|
||||
Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1));
|
||||
var row = sheet.GetRow(r);
|
||||
|
||||
var rowData = new Dictionary<string, object>();
|
||||
|
||||
string prefix = string.Empty;
|
||||
|
||||
for (var c = 0; c < headerRow.LastCellNum; c++)
|
||||
{
|
||||
var usePrefix = true;
|
||||
var columnName = headerRow.GetCell(c)?.StringCellValue;
|
||||
|
||||
columnName = FixColumnName(columnName);
|
||||
|
||||
object value = null;
|
||||
|
||||
if (columnName == "PMASCULINO")
|
||||
{
|
||||
prefix = "MASCULINO_";
|
||||
usePrefix = false;
|
||||
}
|
||||
if (columnName == "PFEMENINO")
|
||||
{
|
||||
prefix = "FEMENINO_";
|
||||
usePrefix = false;
|
||||
}
|
||||
if (columnName == "DESCENDIENTE")
|
||||
{
|
||||
prefix = "DESCENDIENTE_";
|
||||
usePrefix = false;
|
||||
}
|
||||
|
||||
|
||||
switch (row.GetCell(c)?.CellType)
|
||||
{
|
||||
case NPOI.SS.UserModel.CellType.Numeric: value = row.GetCell(c)?.NumericCellValue; break;
|
||||
case NPOI.SS.UserModel.CellType.String: value = row.GetCell(c)?.StringCellValue; break;
|
||||
}
|
||||
|
||||
string valuePrefix = string.Empty;
|
||||
if (columnName.StartsWith("S4i") || columnName.StartsWith("SNP"))
|
||||
{
|
||||
valuePrefix = columnName + "_";
|
||||
}
|
||||
|
||||
var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName;
|
||||
if (value is string)
|
||||
{
|
||||
rowData.Add(finalColumnName, valuePrefix + value);
|
||||
}
|
||||
else
|
||||
{
|
||||
rowData.Add(finalColumnName, value?.ToString() ?? "");
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
tmpData.Add(rowData);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
|
||||
|
||||
MLContext mlContext = new MLContext();
|
||||
var data = mlContext.Data.LoadFromEnumerable(tmpData);
|
||||
|
||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, out Type classType, out DataViewSchema schema);
|
||||
|
||||
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
|
||||
|
||||
var loadMethodObj = loadMethod.MakeGenericMethod(classType);
|
||||
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
|
||||
|
||||
|
||||
//var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema);
|
||||
|
||||
//var data = new DictionaryView<Expando>(tmpData, schema.ToSchema(), converter);
|
||||
|
||||
#region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión)
|
||||
|
||||
@@ -45,20 +130,60 @@ namespace testML
|
||||
|
||||
#region Preparamos los datos de entrada y salida
|
||||
|
||||
var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations:1000);
|
||||
//var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f );
|
||||
//var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100);
|
||||
var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f);
|
||||
|
||||
var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "IntegerNumber")
|
||||
.Append(mlContext.Transforms.Text.NormalizeText("StringTest"))
|
||||
.Append(mlContext.Transforms.Text.FeaturizeText("StringTest"))
|
||||
.Append(mlContext.Transforms.Concatenate("Features", "Enum1", "Enum2", "Enum3", "Enum4", "StringTest"))
|
||||
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
|
||||
.Append(trainer);
|
||||
//var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001");
|
||||
//IEstimator<ITransformer> pipe = (IEstimator<ITransformer>)pipeline;
|
||||
|
||||
//pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label"));
|
||||
//pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label"));
|
||||
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||
|
||||
var columnInference = new ColumnInformation()
|
||||
{
|
||||
LabelColumnName = "DESCENDIENTE_S4i001"
|
||||
};
|
||||
|
||||
foreach (var key in firstRow.Keys)
|
||||
{
|
||||
if (key == columnInference.LabelColumnName)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (key.Contains("_S4i") || key.Contains("_SNP"))
|
||||
{
|
||||
columnInference.CategoricalColumnNames.Add(key);
|
||||
}
|
||||
}
|
||||
|
||||
mlContext.Log += (_, e) => {
|
||||
if (e.Source.Equals("AutoMLExperiment"))
|
||||
{
|
||||
Console.WriteLine(e.RawMessage);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference)
|
||||
.Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName));
|
||||
|
||||
AutoMLExperiment experiment = mlContext.Auto().CreateExperiment();
|
||||
|
||||
experiment
|
||||
.SetPipeline(pipeline)
|
||||
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName)
|
||||
.SetTrainingTimeInSeconds(60)
|
||||
.SetDataset(trainData);
|
||||
|
||||
var result = experiment.Run();
|
||||
|
||||
#endregion
|
||||
|
||||
/*
|
||||
//Entrenamos el modelo
|
||||
ITransformer model = pipeline.Fit(trainData);
|
||||
ITransformer model = pipe.Fit(trainData);
|
||||
|
||||
#region Hacemos un test para medir el % de error
|
||||
|
||||
@@ -85,19 +210,40 @@ namespace testML
|
||||
test.IntegerNumber = 0;
|
||||
|
||||
var p = predictionFunction.Predict(test);
|
||||
|
||||
Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected , expected- p.IntegerNumber);
|
||||
|
||||
Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
|
||||
*/
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Press enter to Exit");
|
||||
Console.ReadLine();
|
||||
|
||||
}
|
||||
|
||||
private static string FixColumnName(string columnName)
|
||||
{
|
||||
var result = new StringBuilder(columnName.Length);
|
||||
|
||||
foreach (var c in columnName)
|
||||
{
|
||||
if (c == 'º' || c == 'ª')
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char.IsLetter(c) ||
|
||||
char.IsNumber(c) ||
|
||||
(c == '_'))
|
||||
{
|
||||
result.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToString();
|
||||
}
|
||||
|
||||
private static Data CreateRandomData()
|
||||
{
|
||||
var d = new Data()
|
||||
@@ -107,12 +253,14 @@ namespace testML
|
||||
Enum2 = rnd.Next(1, 11),
|
||||
Enum3 = rnd.Next(1, 6),
|
||||
Enum4 = rnd.Next(1, 6),
|
||||
StringTest = tags[rnd.Next(0, tags.Length)]
|
||||
// StringTest = tags[rnd.Next(0, tags.Length)]
|
||||
};
|
||||
|
||||
d.Enum4 = d.Enum1 + d.Enum2;
|
||||
|
||||
// Ponemos algunos datos que tengan alguna relación (la red neuronal debería calibrarse para comprender esta formula)
|
||||
d.IntegerNumber = (((d.Enum1 + d.Enum2) - (d.Enum3 + d.Enum4)) * 5.25f) + d.StringTest.Length;
|
||||
|
||||
|
||||
d.DecimalNumber = (d.Enum2 / d.Enum1) * (2.0f + (1.0f / d.StringTest.Length));
|
||||
|
||||
if (d.StringTest == "Azul")
|
||||
@@ -133,4 +281,5 @@ namespace testML
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user