From 4ad973b516c2fa268f84f57c4ec3591a79611c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Maldonado=20S=C3=A1nchez?= Date: Tue, 17 Jan 2023 14:45:00 +0100 Subject: [PATCH] Intentando Clasificar (sin funcionar) --- testML/App.config | 28 ++ testML/DictionaryToObjectConverter.cs | 138 +++++++ testML/DictionaryToObjectConverterClass.cs | 423 +++++++++++++++++++++ testML/DictionaryToObjectConverterClass.tt | 53 +++ testML/Program.cs | 193 ++++++++-- testML/packages.config | 54 ++- testML/testML.csproj | 197 +++++++++- 7 files changed, 1050 insertions(+), 36 deletions(-) create mode 100644 testML/DictionaryToObjectConverter.cs create mode 100644 testML/DictionaryToObjectConverterClass.cs create mode 100644 testML/DictionaryToObjectConverterClass.tt diff --git a/testML/App.config b/testML/App.config index e77a99d..d3e3403 100644 --- a/testML/App.config +++ b/testML/App.config @@ -9,6 +9,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/testML/DictionaryToObjectConverter.cs b/testML/DictionaryToObjectConverter.cs new file mode 100644 index 0000000..2c337cc --- /dev/null +++ b/testML/DictionaryToObjectConverter.cs @@ -0,0 +1,138 @@ +using Microsoft.CSharp; +using Microsoft.ML; +using Microsoft.ML.Data; +using System; +using System.CodeDom.Compiler; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace testML +{ + public static class DictionaryToObjectConverter + { + public static IEnumerable Convert(List> data, out Type classType, out DataViewSchema schema) + { + var schemaBuilder = new DataViewSchema.Builder(); + + var definition = new Dictionary(); + + var sample = data.FirstOrDefault(); + if (sample != null) + { + foreach (var key in sample.Keys) + { + //Buscamos el tipo + var sampleValue = (from x in data where x.ContainsKey(key) && x[key] != null select x[key]).FirstOrDefault(); + if (sampleValue != null) + { + var keyType = sampleValue.GetType(); + definition.Add(key, keyType); + + if (keyType == typeof(string)) + { + schemaBuilder.AddColumn(key, TextDataViewType.Instance); + } + else if (keyType == typeof(double)) + { + schemaBuilder.AddColumn(key, NumberDataViewType.Double); + } + else if (keyType == typeof(float)) + { + schemaBuilder.AddColumn(key, NumberDataViewType.Single); + } + else if (keyType == typeof(int)) + { + schemaBuilder.AddColumn(key, NumberDataViewType.Int32); + } + else if (keyType == typeof(long)) + { + schemaBuilder.AddColumn(key, NumberDataViewType.Int64); + } + } + } + } + + var converter = new DictionaryToObjectConverterClass() + { + ClassName = "OBJ" + Guid.NewGuid().ToString("N").ToUpper(), + Definition = definition + }; + + var classCode = converter.TransformText(); + + string exDir = System.Runtime.InteropServices.RuntimeEnvironment.GetRuntimeDirectory().Trim(Path.DirectorySeparatorChar); + CompilerParameters compilerParameters = new CompilerParameters + { + GenerateExecutable = false, + GenerateInMemory = true, + IncludeDebugInformation = true, + TreatWarningsAsErrors = false, + CompilerOptions = string.Format("/target:library /lib:{0}", exDir) + }; + + var mlDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + + foreach (var item in AppDomain.CurrentDomain.GetAssemblies()) + { + compilerParameters.ReferencedAssemblies.Add(item.Location); + } + + + // compilerParameters.ReferencedAssemblies.Add(Assembly.GetExecutingAssembly().Location); + + CompilerResults compilerResults = + new CSharpCodeProvider(new Dictionary { { "CompilerVersion", "v4.0" } }).CompileAssemblyFromSource(compilerParameters, + new string[] { classCode } + ); + + var dllAssembly = compilerResults.CompiledAssembly; + + classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName); + + Type listType = typeof(List<>); + Type genericType = listType.MakeGenericType(classType); + + var result =(IList) Activator.CreateInstance(genericType) ; + + foreach (var inputData in data) + { + var outputData = (IDictionaryToObjectConverter)Activator.CreateInstance(classType); + result.Add(outputData); + + foreach (var key in inputData.Keys) + { + outputData[key] = inputData[key]; + } + } + + schema = schemaBuilder.ToSchema(); + + return (IEnumerable)result; + + } + } + + public partial class DictionaryToObjectConverterClass + { + public string ClassName { get; set; } + public string ToPredict { get; set; } + public Dictionary Definition { get; set; } + } + + public interface IDictionaryToObjectConverter + { + object this[string propertyName] { get; set; } + + void SetValue(string propertyName, object value); + + object GetValue(string propertyName); + } +} + + diff --git a/testML/DictionaryToObjectConverterClass.cs b/testML/DictionaryToObjectConverterClass.cs new file mode 100644 index 0000000..f2d594d --- /dev/null +++ b/testML/DictionaryToObjectConverterClass.cs @@ -0,0 +1,423 @@ +// ------------------------------------------------------------------------------ +// +// Este código fue generado por una herramienta. +// Versión del runtime: 17.0.0.0 +// +// Los cambios en este archivo podrían causar un comportamiento incorrecto y se perderán si +// se vuelve a generar el código. +// +// ------------------------------------------------------------------------------ +namespace testML +{ + using System.Linq; + using System.Text; + using System.Collections.Generic; + using System; + + /// + /// Class to produce the template output + /// + + #line 1 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.TextTemplating", "17.0.0.0")] + public partial class DictionaryToObjectConverterClass : DictionaryToObjectConverterClassBase + { +#line hidden + /// + /// Create the template output + /// + public virtual string TransformText() + { + this.Write("\r\nusing System;\r\nusing System.Text;\r\nusing Microsoft.ML.Data;\r\n\r\nnamespace Dictio" + + "naryToObjectConverterNamespace\r\n{\r\n\tpublic class "); + + #line 13 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(ClassName)); + + #line default + #line hidden + this.Write(" : testML.IDictionaryToObjectConverter\r\n\t{\r\n\t\t"); + + #line 15 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + foreach (var key in Definition.Keys) + { + var type = Definition[key]; + + + #line default + #line hidden + this.Write("\r\n\t\t[ColumnName(\""); + + #line 20 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write("\")]\r\n\t\tpublic "); + + #line 21 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(type.FullName)); + + #line default + #line hidden + this.Write(" "); + + #line 21 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write(" { get; set; }\r\n\t\t"); + + #line 22 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + } //Fin loop + + #line default + #line hidden + this.Write("\r\n\t\tpublic object this[string propertyName]\r\n\t\t{\r\n\t\t\tget { return GetValue(proper" + + "tyName); }\r\n\t\t\tset { SetValue(propertyName, value); }\r\n\t\t}\r\n\r\n\t\tpublic void SetV" + + "alue(string propertyName, object value)\r\n\t\t{\r\n\t\t\tswitch(propertyName)\r\n\t\t\t{\r\n\t"); + + #line 34 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + foreach (var key in Definition.Keys) { + var type = Definition[key]; + + #line default + #line hidden + this.Write("\t\t\t\tcase \""); + + #line 36 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write("\":\t"); + + #line 36 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write(" = ("); + + #line 36 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(type.FullName)); + + #line default + #line hidden + this.Write(")value;\tbreak;\r\n\t"); + + #line 37 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + } + + #line default + #line hidden + this.Write("\t\t\t}\r\n\t\t}\r\n\r\n\t\tpublic object GetValue(string propertyName)\r\n\t\t{\r\n\t\t\tswitch(proper" + + "tyName)\r\n\t\t\t{\r\n\t"); + + #line 45 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + foreach (var key in Definition.Keys) { + + #line default + #line hidden + this.Write("\t\t\t\tcase \""); + + #line 46 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write("\":\treturn "); + + #line 46 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + this.Write(this.ToStringHelper.ToStringWithCulture(key)); + + #line default + #line hidden + this.Write(";\r\n\t"); + + #line 47 "C:\Users\miguel.maldonado\Documents\Subversion\TestML\testML\DictionaryToObjectConverterClass.tt" + } + + #line default + #line hidden + this.Write("\r\n\t\t\t}\r\n\t\t\treturn null;\r\n\t\t}\r\n\t}\r\n}"); + return this.GenerationEnvironment.ToString(); + } + } + + #line default + #line hidden + #region Base class + /// + /// Base class for this transformation + /// + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.TextTemplating", "17.0.0.0")] + public class DictionaryToObjectConverterClassBase + { + #region Fields + private global::System.Text.StringBuilder generationEnvironmentField; + private global::System.CodeDom.Compiler.CompilerErrorCollection errorsField; + private global::System.Collections.Generic.List indentLengthsField; + private string currentIndentField = ""; + private bool endsWithNewline; + private global::System.Collections.Generic.IDictionary sessionField; + #endregion + #region Properties + /// + /// The string builder that generation-time code is using to assemble generated output + /// + protected System.Text.StringBuilder GenerationEnvironment + { + get + { + if ((this.generationEnvironmentField == null)) + { + this.generationEnvironmentField = new global::System.Text.StringBuilder(); + } + return this.generationEnvironmentField; + } + set + { + this.generationEnvironmentField = value; + } + } + /// + /// The error collection for the generation process + /// + public System.CodeDom.Compiler.CompilerErrorCollection Errors + { + get + { + if ((this.errorsField == null)) + { + this.errorsField = new global::System.CodeDom.Compiler.CompilerErrorCollection(); + } + return this.errorsField; + } + } + /// + /// A list of the lengths of each indent that was added with PushIndent + /// + private System.Collections.Generic.List indentLengths + { + get + { + if ((this.indentLengthsField == null)) + { + this.indentLengthsField = new global::System.Collections.Generic.List(); + } + return this.indentLengthsField; + } + } + /// + /// Gets the current indent we use when adding lines to the output + /// + public string CurrentIndent + { + get + { + return this.currentIndentField; + } + } + /// + /// Current transformation session + /// + public virtual global::System.Collections.Generic.IDictionary Session + { + get + { + return this.sessionField; + } + set + { + this.sessionField = value; + } + } + #endregion + #region Transform-time helpers + /// + /// Write text directly into the generated output + /// + public void Write(string textToAppend) + { + if (string.IsNullOrEmpty(textToAppend)) + { + return; + } + // If we're starting off, or if the previous text ended with a newline, + // we have to append the current indent first. + if (((this.GenerationEnvironment.Length == 0) + || this.endsWithNewline)) + { + this.GenerationEnvironment.Append(this.currentIndentField); + this.endsWithNewline = false; + } + // Check if the current text ends with a newline + if (textToAppend.EndsWith(global::System.Environment.NewLine, global::System.StringComparison.CurrentCulture)) + { + this.endsWithNewline = true; + } + // This is an optimization. If the current indent is "", then we don't have to do any + // of the more complex stuff further down. + if ((this.currentIndentField.Length == 0)) + { + this.GenerationEnvironment.Append(textToAppend); + return; + } + // Everywhere there is a newline in the text, add an indent after it + textToAppend = textToAppend.Replace(global::System.Environment.NewLine, (global::System.Environment.NewLine + this.currentIndentField)); + // If the text ends with a newline, then we should strip off the indent added at the very end + // because the appropriate indent will be added when the next time Write() is called + if (this.endsWithNewline) + { + this.GenerationEnvironment.Append(textToAppend, 0, (textToAppend.Length - this.currentIndentField.Length)); + } + else + { + this.GenerationEnvironment.Append(textToAppend); + } + } + /// + /// Write text directly into the generated output + /// + public void WriteLine(string textToAppend) + { + this.Write(textToAppend); + this.GenerationEnvironment.AppendLine(); + this.endsWithNewline = true; + } + /// + /// Write formatted text directly into the generated output + /// + public void Write(string format, params object[] args) + { + this.Write(string.Format(global::System.Globalization.CultureInfo.CurrentCulture, format, args)); + } + /// + /// Write formatted text directly into the generated output + /// + public void WriteLine(string format, params object[] args) + { + this.WriteLine(string.Format(global::System.Globalization.CultureInfo.CurrentCulture, format, args)); + } + /// + /// Raise an error + /// + public void Error(string message) + { + System.CodeDom.Compiler.CompilerError error = new global::System.CodeDom.Compiler.CompilerError(); + error.ErrorText = message; + this.Errors.Add(error); + } + /// + /// Raise a warning + /// + public void Warning(string message) + { + System.CodeDom.Compiler.CompilerError error = new global::System.CodeDom.Compiler.CompilerError(); + error.ErrorText = message; + error.IsWarning = true; + this.Errors.Add(error); + } + /// + /// Increase the indent + /// + public void PushIndent(string indent) + { + if ((indent == null)) + { + throw new global::System.ArgumentNullException("indent"); + } + this.currentIndentField = (this.currentIndentField + indent); + this.indentLengths.Add(indent.Length); + } + /// + /// Remove the last indent that was added with PushIndent + /// + public string PopIndent() + { + string returnValue = ""; + if ((this.indentLengths.Count > 0)) + { + int indentLength = this.indentLengths[(this.indentLengths.Count - 1)]; + this.indentLengths.RemoveAt((this.indentLengths.Count - 1)); + if ((indentLength > 0)) + { + returnValue = this.currentIndentField.Substring((this.currentIndentField.Length - indentLength)); + this.currentIndentField = this.currentIndentField.Remove((this.currentIndentField.Length - indentLength)); + } + } + return returnValue; + } + /// + /// Remove any indentation + /// + public void ClearIndent() + { + this.indentLengths.Clear(); + this.currentIndentField = ""; + } + #endregion + #region ToString Helpers + /// + /// Utility class to produce culture-oriented representation of an object as a string. + /// + public class ToStringInstanceHelper + { + private System.IFormatProvider formatProviderField = global::System.Globalization.CultureInfo.InvariantCulture; + /// + /// Gets or sets format provider to be used by ToStringWithCulture method. + /// + public System.IFormatProvider FormatProvider + { + get + { + return this.formatProviderField ; + } + set + { + if ((value != null)) + { + this.formatProviderField = value; + } + } + } + /// + /// This is called from the compile/run appdomain to convert objects within an expression block to a string + /// + public string ToStringWithCulture(object objectToConvert) + { + if ((objectToConvert == null)) + { + throw new global::System.ArgumentNullException("objectToConvert"); + } + System.Type t = objectToConvert.GetType(); + System.Reflection.MethodInfo method = t.GetMethod("ToString", new System.Type[] { + typeof(System.IFormatProvider)}); + if ((method == null)) + { + return objectToConvert.ToString(); + } + else + { + return ((string)(method.Invoke(objectToConvert, new object[] { + this.formatProviderField }))); + } + } + } + private ToStringInstanceHelper toStringHelperField = new ToStringInstanceHelper(); + /// + /// Helper to produce culture-oriented representation of an object as a string + /// + public ToStringInstanceHelper ToStringHelper + { + get + { + return this.toStringHelperField; + } + } + #endregion + } + #endregion +} diff --git a/testML/DictionaryToObjectConverterClass.tt b/testML/DictionaryToObjectConverterClass.tt new file mode 100644 index 0000000..9a3abd6 --- /dev/null +++ b/testML/DictionaryToObjectConverterClass.tt @@ -0,0 +1,53 @@ +<#@ template language="C#" #> +<#@ assembly name="System.Core" #> +<#@ import namespace="System.Linq" #> +<#@ import namespace="System.Text" #> +<#@ import namespace="System.Collections.Generic" #> + +using System; +using System.Text; +using Microsoft.ML.Data; + +namespace DictionaryToObjectConverterNamespace +{ + public class <#= ClassName #> : testML.IDictionaryToObjectConverter + { + <# foreach (var key in Definition.Keys) + { + var type = Definition[key]; + #> + + [ColumnName("<#= key #>")] + public <#= type.FullName #> <#= key #> { get; set; } + <# } //Fin loop #> + + public object this[string propertyName] + { + get { return GetValue(propertyName); } + set { SetValue(propertyName, value); } + } + + public void SetValue(string propertyName, object value) + { + switch(propertyName) + { + <# foreach (var key in Definition.Keys) { + var type = Definition[key]; #> + case "<#= key #>": <#= key #> = (<#= type.FullName #>)value; break; + <# } #> + } + } + + public object GetValue(string propertyName) + { + switch(propertyName) + { + <# foreach (var key in Definition.Keys) { #> + case "<#= key #>": return <#= key #>; + <# } #> + + } + return null; + } + } +} \ No newline at end of file diff --git a/testML/Program.cs b/testML/Program.cs index 4207f9e..dc03b5e 100644 --- a/testML/Program.cs +++ b/testML/Program.cs @@ -1,10 +1,16 @@ using Microsoft.ML; +using Microsoft.ML.AutoML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; +using NPOI.XSSF.UserModel; using System; using System.Collections.Generic; +using System.Dynamic; +using System.Globalization; +using System.IO; using System.Linq; using System.Reflection; +using System.Security.AccessControl; using System.Text; using System.Threading.Tasks; using System.Xml.Linq; @@ -13,27 +19,106 @@ namespace testML { internal class Program { - static string[] tags = new string[] { "Rojo", "Amarillo", "Verde claro", "Verde oscuro", "Violeta", "Naranja", "Azul", "Blanco" }; static Random rnd = new Random(); static void Main(string[] args) { + XSSFWorkbook wb; + using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) + { + wb = new XSSFWorkbook(file); + } + + var sheet = wb.GetSheetAt(0); + + var headerRow = sheet.GetRow(0); + #region Preparamos los datos de entrenamiento - var tmpData = new List(); - - for (var c = 0; c < 15000; c++) + var tmpData = new List>(); + + for (var r = 1; r < sheet.LastRowNum - 1; r++) { - var d = CreateRandomData(); - tmpData.Add(d); + if (r == 30) break; + Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); + var row = sheet.GetRow(r); + + var rowData = new Dictionary(); + + string prefix = string.Empty; + + for (var c = 0; c < headerRow.LastCellNum; c++) + { + var usePrefix = true; + var columnName = headerRow.GetCell(c)?.StringCellValue; + + columnName = FixColumnName(columnName); + + object value = null; + + if (columnName == "PMASCULINO") + { + prefix = "MASCULINO_"; + usePrefix = false; + } + if (columnName == "PFEMENINO") + { + prefix = "FEMENINO_"; + usePrefix = false; + } + if (columnName == "DESCENDIENTE") + { + prefix = "DESCENDIENTE_"; + usePrefix = false; + } + + + switch (row.GetCell(c)?.CellType) + { + case NPOI.SS.UserModel.CellType.Numeric: value = row.GetCell(c)?.NumericCellValue; break; + case NPOI.SS.UserModel.CellType.String: value = row.GetCell(c)?.StringCellValue; break; + } + + string valuePrefix = string.Empty; + if (columnName.StartsWith("S4i") || columnName.StartsWith("SNP")) + { + valuePrefix = columnName + "_"; + } + + var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName; + if (value is string) + { + rowData.Add(finalColumnName, valuePrefix + value); + } + else + { + rowData.Add(finalColumnName, value?.ToString() ?? ""); + } + + + + } + tmpData.Add(rowData); } #endregion + MLContext mlContext = new MLContext(); - var data = mlContext.Data.LoadFromEnumerable(tmpData); + + var dataConverted = DictionaryToObjectConverter.Convert(tmpData, out Type classType, out DataViewSchema schema); + + var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault(); + + var loadMethodObj = loadMethod.MakeGenericMethod(classType); + var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null }); + + + //var data = mlContext.Data.LoadFromEnumerable(dataConverted, schema); + + //var data = new DictionaryView(tmpData, schema.ToSchema(), converter); #region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión) @@ -45,20 +130,60 @@ namespace testML #region Preparamos los datos de entrada y salida - var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations:1000); - //var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f ); + //var trainer = mlContext.Regression.Trainers.Sdca(maximumNumberOfIterations: 100); + var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations: 100, learningRate: 0.01f); - var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "IntegerNumber") - .Append(mlContext.Transforms.Text.NormalizeText("StringTest")) - .Append(mlContext.Transforms.Text.FeaturizeText("StringTest")) - .Append(mlContext.Transforms.Concatenate("Features", "Enum1", "Enum2", "Enum3", "Enum4", "StringTest")) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(trainer); + //var pipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: "DESCENDIENTE_S4i001"); + //IEstimator pipe = (IEstimator)pipeline; + + //pipe = pipe.Append(mlContext.Transforms.Text.NormalizeText("Label")); + //pipe = pipe.Append(mlContext.Transforms.Text.FeaturizeText("Label")); + var firstRow = tmpData[0] as IDictionary; + + var columnInference = new ColumnInformation() + { + LabelColumnName = "DESCENDIENTE_S4i001" + }; + + foreach (var key in firstRow.Keys) + { + if (key == columnInference.LabelColumnName) + { + continue; + } + + if (key.Contains("_S4i") || key.Contains("_SNP")) + { + columnInference.CategoricalColumnNames.Add(key); + } + } + + mlContext.Log += (_, e) => { + if (e.Source.Equals("AutoMLExperiment")) + { + Console.WriteLine(e.RawMessage); + } + }; + + + SweepablePipeline pipeline = mlContext.Auto().Featurizer(data, columnInference) + .Append(mlContext.Auto().Regression(labelColumnName: columnInference.LabelColumnName)); + + AutoMLExperiment experiment = mlContext.Auto().CreateExperiment(); + + experiment + .SetPipeline(pipeline) + .SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.LabelColumnName) + .SetTrainingTimeInSeconds(60) + .SetDataset(trainData); + + var result = experiment.Run(); #endregion + /* //Entrenamos el modelo - ITransformer model = pipeline.Fit(trainData); + ITransformer model = pipe.Fit(trainData); #region Hacemos un test para medir el % de error @@ -85,19 +210,40 @@ namespace testML test.IntegerNumber = 0; var p = predictionFunction.Predict(test); - - Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected , expected- p.IntegerNumber); + + Console.WriteLine("Found: {0:#,##0.00}\tExpected: {1:#,##0.00}\t\tDiff: {2:#,##0.00}", p.IntegerNumber, expected, expected - p.IntegerNumber); } #endregion - - + */ Console.WriteLine(); Console.WriteLine("Press enter to Exit"); Console.ReadLine(); } + private static string FixColumnName(string columnName) + { + var result = new StringBuilder(columnName.Length); + + foreach (var c in columnName) + { + if (c == 'º' || c == 'ª') + { + continue; + } + + if (char.IsLetter(c) || + char.IsNumber(c) || + (c == '_')) + { + result.Append(c); + } + } + + return result.ToString(); + } + private static Data CreateRandomData() { var d = new Data() @@ -107,12 +253,14 @@ namespace testML Enum2 = rnd.Next(1, 11), Enum3 = rnd.Next(1, 6), Enum4 = rnd.Next(1, 6), - StringTest = tags[rnd.Next(0, tags.Length)] + // StringTest = tags[rnd.Next(0, tags.Length)] }; + d.Enum4 = d.Enum1 + d.Enum2; + // Ponemos algunos datos que tengan alguna relación (la red neuronal debería calibrarse para comprender esta formula) d.IntegerNumber = (((d.Enum1 + d.Enum2) - (d.Enum3 + d.Enum4)) * 5.25f) + d.StringTest.Length; - + d.DecimalNumber = (d.Enum2 / d.Enum1) * (2.0f + (1.0f / d.StringTest.Length)); if (d.StringTest == "Azul") @@ -133,4 +281,5 @@ namespace testML } + } diff --git a/testML/packages.config b/testML/packages.config index 1116d1d..37132f8 100644 --- a/testML/packages.config +++ b/testML/packages.config @@ -1,16 +1,60 @@  + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + - - - + + + + - + + + + + + + + + + \ No newline at end of file diff --git a/testML/testML.csproj b/testML/testML.csproj index b5a7157..1bc970e 100644 --- a/testML/testML.csproj +++ b/testML/testML.csproj @@ -1,5 +1,15 @@  + + + + + + + + + + @@ -40,9 +50,33 @@ testML.Program + + ..\packages\Google.Protobuf.3.21.9\lib\net45\Google.Protobuf.dll + + + ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll + + + ..\packages\Microsoft.Bcl.AsyncInterfaces.6.0.0\lib\net461\Microsoft.Bcl.AsyncInterfaces.dll + + + ..\packages\Microsoft.CodeAnalysis.Common.3.9.0\lib\netstandard2.0\Microsoft.CodeAnalysis.dll + + + ..\packages\Microsoft.CodeAnalysis.CSharp.3.9.0\lib\netstandard2.0\Microsoft.CodeAnalysis.CSharp.dll + + + ..\packages\Microsoft.Extensions.DependencyInjection.6.0.0\lib\net461\Microsoft.Extensions.DependencyInjection.dll + + + ..\packages\Microsoft.Extensions.DependencyInjection.Abstractions.6.0.0\lib\net461\Microsoft.Extensions.DependencyInjection.Abstractions.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.dll + + ..\packages\Microsoft.ML.AutoML.0.20.0\lib\netstandard2.0\Microsoft.ML.AutoML.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.Core.dll @@ -55,41 +89,134 @@ ..\packages\Microsoft.ML.DataView.2.0.0\lib\netstandard2.0\Microsoft.ML.DataView.dll + + ..\packages\Microsoft.ML.DnnImageFeaturizer.AlexNet.0.20.0\lib\netstandard2.0\Microsoft.ML.DnnImageFeaturizer.AlexNet.dll + + + ..\packages\Microsoft.ML.DnnImageFeaturizer.ResNet101.0.20.0\lib\netstandard2.0\Microsoft.ML.DnnImageFeaturizer.ResNet101.dll + + + ..\packages\Microsoft.ML.DnnImageFeaturizer.ResNet18.0.20.0\lib\netstandard2.0\Microsoft.ML.DnnImageFeaturizer.ResNet18.dll + + + ..\packages\Microsoft.ML.DnnImageFeaturizer.ResNet50.0.20.0\lib\netstandard2.0\Microsoft.ML.DnnImageFeaturizer.ResNet50.dll + + + ..\packages\Microsoft.ML.FastTree.2.0.0\lib\netstandard2.0\Microsoft.ML.FastTree.dll + + + ..\packages\Microsoft.ML.ImageAnalytics.2.0.0\lib\netstandard2.0\Microsoft.ML.ImageAnalytics.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.KMeansClustering.dll + + ..\packages\Microsoft.ML.LightGbm.2.0.0\lib\netstandard2.0\Microsoft.ML.LightGbm.dll + + + ..\packages\Microsoft.ML.Mkl.Components.2.0.0\lib\netstandard2.0\Microsoft.ML.Mkl.Components.dll + + + ..\packages\Microsoft.ML.OnnxRuntime.Managed.1.10.0\lib\netstandard2.0\Microsoft.ML.OnnxRuntime.dll + + + ..\packages\Microsoft.ML.OnnxTransformer.2.0.0\lib\netstandard2.0\Microsoft.ML.OnnxTransformer.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.PCA.dll + + ..\packages\Microsoft.ML.Recommender.0.20.0\lib\netstandard2.0\Microsoft.ML.Recommender.dll + + + ..\packages\Microsoft.ML.AutoML.0.20.0\lib\netstandard2.0\Microsoft.ML.SearchSpace.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.StandardTrainers.dll + + ..\packages\Microsoft.ML.TensorFlow.2.0.0\lib\netstandard2.0\Microsoft.ML.TensorFlow.dll + + + ..\packages\Microsoft.ML.TimeSeries.2.0.0\lib\netstandard2.0\Microsoft.ML.TimeSeries.dll + + + ..\packages\Microsoft.ML.Tokenizers.0.20.0\lib\netstandard2.0\Microsoft.ML.Tokenizers.dll + + + ..\packages\Microsoft.ML.TorchSharp.0.20.0\lib\netstandard2.0\Microsoft.ML.TorchSharp.dll + ..\packages\Microsoft.ML.2.0.0\lib\netstandard2.0\Microsoft.ML.Transforms.dll + + ..\packages\Microsoft.ML.Vision.2.0.0\lib\netstandard2.0\Microsoft.ML.Vision.dll + ..\packages\Newtonsoft.Json.13.0.1\lib\net45\Newtonsoft.Json.dll + + ..\packages\NPOI.Excel.2.1.1\lib\NPOI.dll + + + ..\packages\NPOI.Excel.2.1.1\lib\NPOI.OOXML.dll + + + ..\packages\NPOI.Excel.2.1.1\lib\NPOI.OpenXml4Net.dll + + + ..\packages\NPOI.Excel.2.1.1\lib\NPOI.OpenXmlFormats.dll + + + ..\packages\NumSharp.Lite.0.1.8\lib\netstandard2.0\NumSharp.Lite.dll + + + ..\packages\Protobuf.Text.0.4.0\lib\netstandard2.0\Protobuf.Text.dll + + + ..\packages\SkiaSharp.2.88.3\lib\net462\SkiaSharp.dll + - - ..\packages\System.Buffers.4.4.0\lib\netstandard2.0\System.Buffers.dll + + ..\packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll ..\packages\System.CodeDom.4.5.0\lib\net461\System.CodeDom.dll - - ..\packages\System.Collections.Immutable.1.5.0\lib\netstandard2.0\System.Collections.Immutable.dll + + ..\packages\System.Collections.Immutable.5.0.0\lib\net461\System.Collections.Immutable.dll + + ..\packages\System.IO.FileSystem.AccessControl.4.5.0\lib\net461\System.IO.FileSystem.AccessControl.dll + - ..\packages\System.Memory.4.5.3\lib\netstandard2.0\System.Memory.dll + ..\packages\System.Memory.4.5.4\lib\net461\System.Memory.dll - - ..\packages\System.Numerics.Vectors.4.4.0\lib\net46\System.Numerics.Vectors.dll + + ..\packages\System.Numerics.Vectors.4.5.0\lib\net46\System.Numerics.Vectors.dll - - ..\packages\System.Runtime.CompilerServices.Unsafe.4.5.3\lib\net461\System.Runtime.CompilerServices.Unsafe.dll + + ..\packages\System.Reflection.Metadata.5.0.0\lib\net461\System.Reflection.Metadata.dll + + + ..\packages\System.Runtime.CompilerServices.Unsafe.6.0.0\lib\net461\System.Runtime.CompilerServices.Unsafe.dll + + + ..\packages\System.Security.AccessControl.4.5.0\lib\net461\System.Security.AccessControl.dll + + + ..\packages\System.Security.Principal.Windows.4.5.0\lib\net461\System.Security.Principal.Windows.dll + + + ..\packages\System.Text.Encoding.CodePages.4.5.1\lib\net461\System.Text.Encoding.CodePages.dll + + + ..\packages\System.Text.Encodings.Web.6.0.0\lib\net461\System.Text.Encodings.Web.dll + + + ..\packages\System.Text.Json.6.0.1\lib\net461\System.Text.Json.dll ..\packages\System.Threading.Channels.4.7.1\lib\net461\System.Threading.Channels.dll @@ -97,22 +224,50 @@ ..\packages\System.Threading.Tasks.Extensions.4.5.4\lib\net461\System.Threading.Tasks.Extensions.dll + + ..\packages\System.ValueTuple.4.5.0\lib\net47\System.ValueTuple.dll + + + ..\packages\TensorFlow.NET.0.20.1\lib\netstandard2.0\TensorFlow.NET.dll + + + ..\packages\TorchSharp.0.99.0\lib\netstandard2.0\TorchSharp.dll + + + + True + True + DictionaryToObjectConverterClass.tt + + + + TextTemplatingFilePreprocessor + DictionaryToObjectConverterClass.cs + + + + + + + + + @@ -121,6 +276,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file