From 8b128ceb46f458fde2b8dc8a456fe0d6c34e3410 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Maldonado=20S=C3=A1nchez?= Date: Thu, 19 Jan 2023 10:55:47 +0100 Subject: [PATCH] Adaptado para que agrupe los datos --- testML/DictionaryToObjectConverter.cs | 88 +++++++++++++++++---------- testML/Program.cs | 39 +++++++++--- 2 files changed, 85 insertions(+), 42 deletions(-) diff --git a/testML/DictionaryToObjectConverter.cs b/testML/DictionaryToObjectConverter.cs index 5164f62..02cb9b3 100644 --- a/testML/DictionaryToObjectConverter.cs +++ b/testML/DictionaryToObjectConverter.cs @@ -10,14 +10,18 @@ using System.Linq; using System.Reflection; using System.Runtime.CompilerServices; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; namespace testML { public static class DictionaryToObjectConverter { - public static IEnumerable Convert(List> data, string toPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema) + public static IEnumerable Convert(List> data, string toPredict, string objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema) { + var regexCR = new Regex(@"_CR\d+"); + var currentCR = regexCR.Match(toPredict).Groups[0].Value; + var schemaBuilder = new DataViewSchema.Builder(); var definition = new Dictionary(); @@ -27,6 +31,11 @@ namespace testML { foreach (var key in sample.Keys) { + if (!key.Contains(currentCR)) + { + continue; + } + //Buscamos el tipo var sampleValue = (from x in data where x.ContainsKey(key) && x[key] != null select x[key]).FirstOrDefault(); if (sampleValue != null) @@ -58,44 +67,57 @@ namespace testML } } - var converter = new DictionaryToObjectConverterClass() + var className = "OBJ"+ Path.GetFileNameWithoutExtension(objectFilename).Replace(".", "_"); + + Assembly dllAssembly = null; + if (File.Exists(Path.Combine(Environment.CurrentDirectory, objectFilename))) { - ClassName = "OBJ" + Guid.NewGuid().ToString("N").ToUpper(), - ToPredict = toPredict, - Definition = definition - }; - - var classCode = converter.TransformText(); - - string exDir = System.Runtime.InteropServices.RuntimeEnvironment.GetRuntimeDirectory().Trim(Path.DirectorySeparatorChar); - CompilerParameters compilerParameters = new CompilerParameters + dllAssembly = Assembly.LoadFrom(Path.Combine(Environment.CurrentDirectory, objectFilename)); + } + else { - GenerateExecutable = false, - GenerateInMemory = true, - IncludeDebugInformation = true, - TreatWarningsAsErrors = false, - CompilerOptions = string.Format("/target:library /lib:{0}", exDir) - }; + var converter = new DictionaryToObjectConverterClass() + { + ClassName = className, + ToPredict = toPredict, + Definition = definition + }; - var mlDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + var classCode = converter.TransformText(); - foreach (var item in AppDomain.CurrentDomain.GetAssemblies()) - { - compilerParameters.ReferencedAssemblies.Add(item.Location); + File.WriteAllText(Path.Combine(Environment.CurrentDirectory, objectFilename + ".cs"), classCode, Encoding.UTF8); + + string exDir = System.Runtime.InteropServices.RuntimeEnvironment.GetRuntimeDirectory().Trim(Path.DirectorySeparatorChar); + CompilerParameters compilerParameters = new CompilerParameters + { + GenerateExecutable = false, + GenerateInMemory = false, + IncludeDebugInformation = true, + TreatWarningsAsErrors = false, + OutputAssembly = Path.Combine(Environment.CurrentDirectory, objectFilename), + CompilerOptions = string.Format("/target:library /lib:{0}", exDir) + }; + + var mlDir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + + foreach (var item in AppDomain.CurrentDomain.GetAssemblies()) + { + compilerParameters.ReferencedAssemblies.Add(item.Location); + } + + + // compilerParameters.ReferencedAssemblies.Add(Assembly.GetExecutingAssembly().Location); + + CompilerResults compilerResults = + new CSharpCodeProvider(new Dictionary { { "CompilerVersion", "v4.0" } }).CompileAssemblyFromSource(compilerParameters, + new string[] { classCode } + ); + + dllAssembly = compilerResults.CompiledAssembly; } - - // compilerParameters.ReferencedAssemblies.Add(Assembly.GetExecutingAssembly().Location); - - CompilerResults compilerResults = - new CSharpCodeProvider(new Dictionary { { "CompilerVersion", "v4.0" } }).CompileAssemblyFromSource(compilerParameters, - new string[] { classCode } - ); - - var dllAssembly = compilerResults.CompiledAssembly; - - classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName); - classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName + "Prediction"); + classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + className); + classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + className + "Prediction"); Type listType = typeof(List<>); Type genericType = listType.MakeGenericType(classType); diff --git a/testML/Program.cs b/testML/Program.cs index 6c48a35..17306b1 100644 --- a/testML/Program.cs +++ b/testML/Program.cs @@ -16,6 +16,7 @@ using System.Reflection; using System.Security.AccessControl; using System.Security.Cryptography; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Xml.Linq; using static TorchSharp.torch.utils; @@ -30,21 +31,23 @@ namespace testML { XSSFWorkbook wb; //using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) - using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read)) + using (FileStream file = new FileStream(@"entrenar_IA (1).xlsx", FileMode.Open, FileAccess.Read)) { wb = new XSSFWorkbook(file); } var sheet = wb.GetSheetAt(0); - var headerRow = sheet.GetRow(0); + var CRRow = sheet.GetRow(0); + var headerRow = sheet.GetRow(1); + #region Preparamos los datos de entrenamiento var tmpData = new List>(); - for (var r = 1; r < sheet.LastRowNum - 1; r++) + for (var r = headerRow.RowNum + 1; r < sheet.LastRowNum - 1; r++) { //if (r == 50) break; Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); @@ -58,6 +61,7 @@ namespace testML { var usePrefix = true; var columnName = headerRow.GetCell(c)?.StringCellValue; + var crCell = CRRow.GetCell(c)?.NumericCellValue; columnName = FixColumnName(columnName); @@ -93,6 +97,12 @@ namespace testML } var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName; + + if (crCell != null) + { + finalColumnName = finalColumnName + "_CR" + crCell.Value.ToString(); + } + if (value is string) { rowData.Add(finalColumnName, valuePrefix + value); @@ -120,7 +130,7 @@ namespace testML item.Remove(key); } } - } + } } foreach (var key in firstRow.Keys) @@ -135,12 +145,12 @@ namespace testML try { var sw = new Stopwatch(); - sw.Start(); + sw.Start(); MakePrediction(tmpData, key); sw.Stop(); - + Console.WriteLine("Elapsed: " + sw.Elapsed.ToString()); GC.Collect(); @@ -164,11 +174,21 @@ namespace testML private static void MakePrediction(List> tmpData, string columnToPredict) { + var regexCR = new Regex(@"_CR\d+"); + + var currentCR = regexCR.Match(columnToPredict).Groups[0].Value; + + var firstRow = tmpData[0] as IDictionary; var hashKey = new StringBuilder(); foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x)) { + if(!key.Contains(currentCR)) + { + continue; + } + if (hashKey.Length > 0) { hashKey.Append("+"); } hashKey.Append(key); } @@ -177,6 +197,7 @@ namespace testML var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray()); var modelFilename = columnToPredict + "." + hash + ".zip"; + var objectFilename = columnToPredict + "." + hash + ".dll"; #endregion @@ -185,7 +206,6 @@ namespace testML MLContext mlContext = new MLContext(); - mlContext.Log += (_, e) => { if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; } @@ -203,7 +223,7 @@ namespace testML }; - var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema); + var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema); ITransformer _trainedModel; if (!File.Exists(modelFilename)) @@ -234,8 +254,9 @@ namespace testML var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes); var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType); - + Console.WriteLine("Training..."); + _trainedModel = trainingPipeline.Fit(trainData); mlContext.Model.Save(_trainedModel, data.Schema, modelFilename);