1
0

Ejecutando con el conjunto completo

This commit is contained in:
2023-01-18 15:20:57 +01:00
parent 7f68e262f4
commit 105e2e471e

View File

@@ -7,15 +7,18 @@ using Microsoft.SqlServer.Server;
using NPOI.XSSF.UserModel; using NPOI.XSSF.UserModel;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.Dynamic; using System.Dynamic;
using System.Globalization; using System.Globalization;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Reflection; using System.Reflection;
using System.Security.AccessControl; using System.Security.AccessControl;
using System.Security.Cryptography;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Xml.Linq; using System.Xml.Linq;
using static TorchSharp.torch.utils;
namespace testML namespace testML
{ {
@@ -98,20 +101,16 @@ namespace testML
{ {
rowData.Add(finalColumnName, value?.ToString() ?? ""); rowData.Add(finalColumnName, value?.ToString() ?? "");
} }
} }
tmpData.Add(rowData); tmpData.Add(rowData);
} }
//Eliminamos las columnas en blanco
var columnToPredict = "DESCENDIENTE_S4i001";
var firstRow = tmpData[0] as IDictionary<string, object>; var firstRow = tmpData[0] as IDictionary<string, object>;
foreach (var key in firstRow.Keys.ToArray()) foreach (var key in firstRow.Keys.ToArray())
{ {
var firstValue = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).FirstOrDefault(); var values = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]);
var firstValue = values.FirstOrDefault();
if (firstValue == null) if (firstValue == null)
{ {
foreach (var item in tmpData) foreach (var item in tmpData)
@@ -123,8 +122,64 @@ namespace testML
} }
} }
} }
#endregion
foreach (var key in firstRow.Keys)
{
if (key.StartsWith("DESCENDIENTE_S4i") ||
key.StartsWith("DESCENDIENTE_SNP"))
{
var values = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).Distinct().ToArray();
if (values.Length > 1)
{
try
{
var sw = new Stopwatch();
sw.Start();
MakePrediction(tmpData, key);
sw.Stop();
Console.WriteLine("Elapsed: " + sw.Elapsed.ToString());
GC.Collect();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
}
else
{
}
}
}
Console.WriteLine();
Console.WriteLine("Press enter to Exit");
Console.ReadLine();
}
private static void MakePrediction(List<Dictionary<string, object>> tmpData, string columnToPredict)
{
var firstRow = tmpData[0] as IDictionary<string, object>;
var hashKey = new StringBuilder();
foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x))
{
if (hashKey.Length > 0) { hashKey.Append("+"); }
hashKey.Append(key);
}
var md5 = MD5.Create();
var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray());
var modelFilename = columnToPredict + "." + hash + ".zip";
#endregion
MLContext mlContext = new MLContext(); MLContext mlContext = new MLContext();
@@ -147,11 +202,12 @@ namespace testML
//} //}
}; };
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema); var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
tmpData = null; //Liberamos la memoria ITransformer _trainedModel;
if (!File.Exists(modelFilename))
{
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault(); var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
var loadMethodObj = loadMethod.MakeGenericMethod(classType); var loadMethodObj = loadMethod.MakeGenericMethod(classType);
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null }); var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
@@ -180,9 +236,14 @@ namespace testML
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType); var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
Console.WriteLine("Training..."); Console.WriteLine("Training...");
var _trainedModel = trainingPipeline.Fit(trainData); _trainedModel = trainingPipeline.Fit(trainData);
mlContext.Model.Save(_trainedModel, data.Schema, columnToPredict + ".zip"); mlContext.Model.Save(_trainedModel, data.Schema, modelFilename);
}
else
{
_trainedModel = mlContext.Model.Load(modelFilename, out schema);
}
var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault(); var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault();
var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType); var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType);
@@ -228,7 +289,6 @@ namespace testML
Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000"))); Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000")));
#endregion #endregion
/* /*
@@ -266,13 +326,8 @@ namespace testML
#endregion #endregion
*/ */
Console.WriteLine();
Console.WriteLine("Press enter to Exit");
Console.ReadLine();
} }
private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames) private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames)
{ {
IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label"); IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label");