Ejecutando con el conjunto completo
This commit is contained in:
@@ -7,15 +7,18 @@ using Microsoft.SqlServer.Server;
|
|||||||
using NPOI.XSSF.UserModel;
|
using NPOI.XSSF.UserModel;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Diagnostics;
|
||||||
using System.Dynamic;
|
using System.Dynamic;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Reflection;
|
using System.Reflection;
|
||||||
using System.Security.AccessControl;
|
using System.Security.AccessControl;
|
||||||
|
using System.Security.Cryptography;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using System.Xml.Linq;
|
using System.Xml.Linq;
|
||||||
|
using static TorchSharp.torch.utils;
|
||||||
|
|
||||||
namespace testML
|
namespace testML
|
||||||
{
|
{
|
||||||
@@ -98,20 +101,16 @@ namespace testML
|
|||||||
{
|
{
|
||||||
rowData.Add(finalColumnName, value?.ToString() ?? "");
|
rowData.Add(finalColumnName, value?.ToString() ?? "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
tmpData.Add(rowData);
|
tmpData.Add(rowData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Eliminamos las columnas en blanco
|
||||||
var columnToPredict = "DESCENDIENTE_S4i001";
|
|
||||||
|
|
||||||
var firstRow = tmpData[0] as IDictionary<string, object>;
|
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||||
foreach (var key in firstRow.Keys.ToArray())
|
foreach (var key in firstRow.Keys.ToArray())
|
||||||
{
|
{
|
||||||
var firstValue = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).FirstOrDefault();
|
var values = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]);
|
||||||
|
var firstValue = values.FirstOrDefault();
|
||||||
if (firstValue == null)
|
if (firstValue == null)
|
||||||
{
|
{
|
||||||
foreach (var item in tmpData)
|
foreach (var item in tmpData)
|
||||||
@@ -123,8 +122,64 @@ namespace testML
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endregion
|
|
||||||
|
|
||||||
|
foreach (var key in firstRow.Keys)
|
||||||
|
{
|
||||||
|
if (key.StartsWith("DESCENDIENTE_S4i") ||
|
||||||
|
key.StartsWith("DESCENDIENTE_SNP"))
|
||||||
|
{
|
||||||
|
var values = (from x in tmpData where x.ContainsKey(key) && x[key] != null && !string.IsNullOrEmpty(x[key] as string) select x[key]).Distinct().ToArray();
|
||||||
|
if (values.Length > 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var sw = new Stopwatch();
|
||||||
|
sw.Start();
|
||||||
|
|
||||||
|
MakePrediction(tmpData, key);
|
||||||
|
|
||||||
|
sw.Stop();
|
||||||
|
|
||||||
|
Console.WriteLine("Elapsed: " + sw.Elapsed.ToString());
|
||||||
|
|
||||||
|
GC.Collect();
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine(ex.ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.WriteLine("Press enter to Exit");
|
||||||
|
Console.ReadLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void MakePrediction(List<Dictionary<string, object>> tmpData, string columnToPredict)
|
||||||
|
{
|
||||||
|
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||||
|
|
||||||
|
var hashKey = new StringBuilder();
|
||||||
|
foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x))
|
||||||
|
{
|
||||||
|
if (hashKey.Length > 0) { hashKey.Append("+"); }
|
||||||
|
hashKey.Append(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
var md5 = MD5.Create();
|
||||||
|
var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray());
|
||||||
|
|
||||||
|
var modelFilename = columnToPredict + "." + hash + ".zip";
|
||||||
|
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
|
||||||
MLContext mlContext = new MLContext();
|
MLContext mlContext = new MLContext();
|
||||||
@@ -147,42 +202,48 @@ namespace testML
|
|||||||
//}
|
//}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
|
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
|
||||||
|
|
||||||
tmpData = null; //Liberamos la memoria
|
ITransformer _trainedModel;
|
||||||
|
if (!File.Exists(modelFilename))
|
||||||
|
|
||||||
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
|
|
||||||
var loadMethodObj = loadMethod.MakeGenericMethod(classType);
|
|
||||||
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
|
|
||||||
|
|
||||||
#region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión)
|
|
||||||
|
|
||||||
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.5);
|
|
||||||
IDataView trainData = dataSplit.TrainSet;
|
|
||||||
IDataView testData = dataSplit.TestSet;
|
|
||||||
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
|
|
||||||
#region Preparamos los datos de entrada y salida
|
|
||||||
|
|
||||||
|
|
||||||
var columnNameAndTypes = new Dictionary<string, Type>();
|
|
||||||
foreach (var item in (from x in firstRow.Keys
|
|
||||||
select new { Key = x, Type = (from y in dataConverted.Cast<IDictionaryToObjectConverter>() where y.GetValue(x) != null select y.GetValue(x).GetType()).FirstOrDefault() })
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
columnNameAndTypes.Add(item.Key, item.Type);
|
var loadMethod = mlContext.Data.GetType().GetMethods().Where(x => x.Name == "LoadFromEnumerable" && x.IsGenericMethodDefinition).FirstOrDefault();
|
||||||
|
var loadMethodObj = loadMethod.MakeGenericMethod(classType);
|
||||||
|
var data = (IDataView)loadMethodObj.Invoke(mlContext.Data, new object[] { dataConverted, null });
|
||||||
|
|
||||||
|
#region Cortamos los datos de entrenamiento en (Datos para entenar y Datos para hacer el test de precisión)
|
||||||
|
|
||||||
|
DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.5);
|
||||||
|
IDataView trainData = dataSplit.TrainSet;
|
||||||
|
IDataView testData = dataSplit.TestSet;
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
|
||||||
|
#region Preparamos los datos de entrada y salida
|
||||||
|
|
||||||
|
|
||||||
|
var columnNameAndTypes = new Dictionary<string, Type>();
|
||||||
|
foreach (var item in (from x in firstRow.Keys
|
||||||
|
select new { Key = x, Type = (from y in dataConverted.Cast<IDictionaryToObjectConverter>() where y.GetValue(x) != null select y.GetValue(x).GetType()).FirstOrDefault() })
|
||||||
|
)
|
||||||
|
{
|
||||||
|
columnNameAndTypes.Add(item.Key, item.Type);
|
||||||
|
}
|
||||||
|
|
||||||
|
var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes);
|
||||||
|
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
|
||||||
|
|
||||||
|
Console.WriteLine("Training...");
|
||||||
|
_trainedModel = trainingPipeline.Fit(trainData);
|
||||||
|
|
||||||
|
mlContext.Model.Save(_trainedModel, data.Schema, modelFilename);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_trainedModel = mlContext.Model.Load(modelFilename, out schema);
|
||||||
}
|
}
|
||||||
|
|
||||||
var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes);
|
|
||||||
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
|
|
||||||
|
|
||||||
Console.WriteLine("Training...");
|
|
||||||
var _trainedModel = trainingPipeline.Fit(trainData);
|
|
||||||
|
|
||||||
mlContext.Model.Save(_trainedModel, data.Schema, columnToPredict + ".zip");
|
|
||||||
|
|
||||||
var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault();
|
var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault();
|
||||||
var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType);
|
var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(classType, classPredictionType);
|
||||||
@@ -228,7 +289,6 @@ namespace testML
|
|||||||
|
|
||||||
Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000")));
|
Console.WriteLine(string.Format("Ok: {0}, Fail: {1}, Percent: {2}%", ok, fail, (((double)ok / (double)(ok + fail)) * 100.0).ToString("##0.0000")));
|
||||||
|
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -266,13 +326,8 @@ namespace testML
|
|||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
*/
|
*/
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine("Press enter to Exit");
|
|
||||||
Console.ReadLine();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames)
|
private static IEstimator<ITransformer> ProcessData(MLContext mlContext, string predictColumnName, Dictionary<string, Type> columnNames)
|
||||||
{
|
{
|
||||||
IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label");
|
IEstimator<ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: predictColumnName, outputColumnName: "Label");
|
||||||
@@ -300,7 +355,7 @@ namespace testML
|
|||||||
|
|
||||||
public static IEstimator<ITransformer> BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> pipeline, Type modelType, Type prodelPredictionType)
|
public static IEstimator<ITransformer> BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> pipeline, Type modelType, Type prodelPredictionType)
|
||||||
{
|
{
|
||||||
var trainingPipeline = pipeline.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features", maximumNumberOfIterations: 1000))
|
var trainingPipeline = pipeline.Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features", maximumNumberOfIterations: 1000))
|
||||||
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
|
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
|
||||||
|
|
||||||
return trainingPipeline;
|
return trainingPipeline;
|
||||||
|
|||||||
Reference in New Issue
Block a user