1
0

Adaptado para que agrupe los datos

This commit is contained in:
2023-01-19 10:55:47 +01:00
parent 105e2e471e
commit 8b128ceb46
2 changed files with 85 additions and 42 deletions

View File

@@ -10,14 +10,18 @@ using System.Linq;
using System.Reflection; using System.Reflection;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Text; using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
namespace testML namespace testML
{ {
public static class DictionaryToObjectConverter public static class DictionaryToObjectConverter
{ {
public static IEnumerable<object> Convert(List<Dictionary<string, object>> data, string toPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema) public static IEnumerable<object> Convert(List<Dictionary<string, object>> data, string toPredict, string objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema)
{ {
var regexCR = new Regex(@"_CR\d+");
var currentCR = regexCR.Match(toPredict).Groups[0].Value;
var schemaBuilder = new DataViewSchema.Builder(); var schemaBuilder = new DataViewSchema.Builder();
var definition = new Dictionary<string, Type>(); var definition = new Dictionary<string, Type>();
@@ -27,6 +31,11 @@ namespace testML
{ {
foreach (var key in sample.Keys) foreach (var key in sample.Keys)
{ {
if (!key.Contains(currentCR))
{
continue;
}
//Buscamos el tipo //Buscamos el tipo
var sampleValue = (from x in data where x.ContainsKey(key) && x[key] != null select x[key]).FirstOrDefault(); var sampleValue = (from x in data where x.ContainsKey(key) && x[key] != null select x[key]).FirstOrDefault();
if (sampleValue != null) if (sampleValue != null)
@@ -58,22 +67,34 @@ namespace testML
} }
} }
var className = "OBJ"+ Path.GetFileNameWithoutExtension(objectFilename).Replace(".", "_");
Assembly dllAssembly = null;
if (File.Exists(Path.Combine(Environment.CurrentDirectory, objectFilename)))
{
dllAssembly = Assembly.LoadFrom(Path.Combine(Environment.CurrentDirectory, objectFilename));
}
else
{
var converter = new DictionaryToObjectConverterClass() var converter = new DictionaryToObjectConverterClass()
{ {
ClassName = "OBJ" + Guid.NewGuid().ToString("N").ToUpper(), ClassName = className,
ToPredict = toPredict, ToPredict = toPredict,
Definition = definition Definition = definition
}; };
var classCode = converter.TransformText(); var classCode = converter.TransformText();
File.WriteAllText(Path.Combine(Environment.CurrentDirectory, objectFilename + ".cs"), classCode, Encoding.UTF8);
string exDir = System.Runtime.InteropServices.RuntimeEnvironment.GetRuntimeDirectory().Trim(Path.DirectorySeparatorChar); string exDir = System.Runtime.InteropServices.RuntimeEnvironment.GetRuntimeDirectory().Trim(Path.DirectorySeparatorChar);
CompilerParameters compilerParameters = new CompilerParameters CompilerParameters compilerParameters = new CompilerParameters
{ {
GenerateExecutable = false, GenerateExecutable = false,
GenerateInMemory = true, GenerateInMemory = false,
IncludeDebugInformation = true, IncludeDebugInformation = true,
TreatWarningsAsErrors = false, TreatWarningsAsErrors = false,
OutputAssembly = Path.Combine(Environment.CurrentDirectory, objectFilename),
CompilerOptions = string.Format("/target:library /lib:{0}", exDir) CompilerOptions = string.Format("/target:library /lib:{0}", exDir)
}; };
@@ -92,10 +113,11 @@ namespace testML
new string[] { classCode } new string[] { classCode }
); );
var dllAssembly = compilerResults.CompiledAssembly; dllAssembly = compilerResults.CompiledAssembly;
}
classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName); classType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + className);
classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + converter.ClassName + "Prediction"); classPredictionType = dllAssembly.GetType("DictionaryToObjectConverterNamespace." + className + "Prediction");
Type listType = typeof(List<>); Type listType = typeof(List<>);
Type genericType = listType.MakeGenericType(classType); Type genericType = listType.MakeGenericType(classType);

View File

@@ -16,6 +16,7 @@ using System.Reflection;
using System.Security.AccessControl; using System.Security.AccessControl;
using System.Security.Cryptography; using System.Security.Cryptography;
using System.Text; using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Xml.Linq; using System.Xml.Linq;
using static TorchSharp.torch.utils; using static TorchSharp.torch.utils;
@@ -30,21 +31,23 @@ namespace testML
{ {
XSSFWorkbook wb; XSSFWorkbook wb;
//using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) //using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read)) using (FileStream file = new FileStream(@"entrenar_IA (1).xlsx", FileMode.Open, FileAccess.Read))
{ {
wb = new XSSFWorkbook(file); wb = new XSSFWorkbook(file);
} }
var sheet = wb.GetSheetAt(0); var sheet = wb.GetSheetAt(0);
var headerRow = sheet.GetRow(0); var CRRow = sheet.GetRow(0);
var headerRow = sheet.GetRow(1);
#region Preparamos los datos de entrenamiento #region Preparamos los datos de entrenamiento
var tmpData = new List<Dictionary<string, object>>(); var tmpData = new List<Dictionary<string, object>>();
for (var r = 1; r < sheet.LastRowNum - 1; r++) for (var r = headerRow.RowNum + 1; r < sheet.LastRowNum - 1; r++)
{ {
//if (r == 50) break; //if (r == 50) break;
Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1));
@@ -58,6 +61,7 @@ namespace testML
{ {
var usePrefix = true; var usePrefix = true;
var columnName = headerRow.GetCell(c)?.StringCellValue; var columnName = headerRow.GetCell(c)?.StringCellValue;
var crCell = CRRow.GetCell(c)?.NumericCellValue;
columnName = FixColumnName(columnName); columnName = FixColumnName(columnName);
@@ -93,6 +97,12 @@ namespace testML
} }
var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName; var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName;
if (crCell != null)
{
finalColumnName = finalColumnName + "_CR" + crCell.Value.ToString();
}
if (value is string) if (value is string)
{ {
rowData.Add(finalColumnName, valuePrefix + value); rowData.Add(finalColumnName, valuePrefix + value);
@@ -164,11 +174,21 @@ namespace testML
private static void MakePrediction(List<Dictionary<string, object>> tmpData, string columnToPredict) private static void MakePrediction(List<Dictionary<string, object>> tmpData, string columnToPredict)
{ {
var regexCR = new Regex(@"_CR\d+");
var currentCR = regexCR.Match(columnToPredict).Groups[0].Value;
var firstRow = tmpData[0] as IDictionary<string, object>; var firstRow = tmpData[0] as IDictionary<string, object>;
var hashKey = new StringBuilder(); var hashKey = new StringBuilder();
foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x)) foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x))
{ {
if(!key.Contains(currentCR))
{
continue;
}
if (hashKey.Length > 0) { hashKey.Append("+"); } if (hashKey.Length > 0) { hashKey.Append("+"); }
hashKey.Append(key); hashKey.Append(key);
} }
@@ -177,6 +197,7 @@ namespace testML
var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray()); var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray());
var modelFilename = columnToPredict + "." + hash + ".zip"; var modelFilename = columnToPredict + "." + hash + ".zip";
var objectFilename = columnToPredict + "." + hash + ".dll";
#endregion #endregion
@@ -185,7 +206,6 @@ namespace testML
MLContext mlContext = new MLContext(); MLContext mlContext = new MLContext();
mlContext.Log += (_, e) => mlContext.Log += (_, e) =>
{ {
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; } if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; }
@@ -203,7 +223,7 @@ namespace testML
}; };
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema); var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema);
ITransformer _trainedModel; ITransformer _trainedModel;
if (!File.Exists(modelFilename)) if (!File.Exists(modelFilename))
@@ -236,6 +256,7 @@ namespace testML
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType); var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
Console.WriteLine("Training..."); Console.WriteLine("Training...");
_trainedModel = trainingPipeline.Fit(trainData); _trainedModel = trainingPipeline.Fit(trainData);
mlContext.Model.Save(_trainedModel, data.Schema, modelFilename); mlContext.Model.Save(_trainedModel, data.Schema, modelFilename);