Adaptado para que agrupe los datos
This commit is contained in:
@@ -16,6 +16,7 @@ using System.Reflection;
|
||||
using System.Security.AccessControl;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using System.Xml.Linq;
|
||||
using static TorchSharp.torch.utils;
|
||||
@@ -30,21 +31,23 @@ namespace testML
|
||||
{
|
||||
XSSFWorkbook wb;
|
||||
//using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read))
|
||||
using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IA.xlsx", FileMode.Open, FileAccess.Read))
|
||||
using (FileStream file = new FileStream(@"entrenar_IA (1).xlsx", FileMode.Open, FileAccess.Read))
|
||||
{
|
||||
wb = new XSSFWorkbook(file);
|
||||
}
|
||||
|
||||
var sheet = wb.GetSheetAt(0);
|
||||
|
||||
var headerRow = sheet.GetRow(0);
|
||||
var CRRow = sheet.GetRow(0);
|
||||
var headerRow = sheet.GetRow(1);
|
||||
|
||||
|
||||
|
||||
#region Preparamos los datos de entrenamiento
|
||||
|
||||
var tmpData = new List<Dictionary<string, object>>();
|
||||
|
||||
for (var r = 1; r < sheet.LastRowNum - 1; r++)
|
||||
for (var r = headerRow.RowNum + 1; r < sheet.LastRowNum - 1; r++)
|
||||
{
|
||||
//if (r == 50) break;
|
||||
Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1));
|
||||
@@ -58,6 +61,7 @@ namespace testML
|
||||
{
|
||||
var usePrefix = true;
|
||||
var columnName = headerRow.GetCell(c)?.StringCellValue;
|
||||
var crCell = CRRow.GetCell(c)?.NumericCellValue;
|
||||
|
||||
columnName = FixColumnName(columnName);
|
||||
|
||||
@@ -93,6 +97,12 @@ namespace testML
|
||||
}
|
||||
|
||||
var finalColumnName = (usePrefix ? prefix : string.Empty) + columnName;
|
||||
|
||||
if (crCell != null)
|
||||
{
|
||||
finalColumnName = finalColumnName + "_CR" + crCell.Value.ToString();
|
||||
}
|
||||
|
||||
if (value is string)
|
||||
{
|
||||
rowData.Add(finalColumnName, valuePrefix + value);
|
||||
@@ -120,7 +130,7 @@ namespace testML
|
||||
item.Remove(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var key in firstRow.Keys)
|
||||
@@ -135,12 +145,12 @@ namespace testML
|
||||
try
|
||||
{
|
||||
var sw = new Stopwatch();
|
||||
sw.Start();
|
||||
sw.Start();
|
||||
|
||||
MakePrediction(tmpData, key);
|
||||
|
||||
sw.Stop();
|
||||
|
||||
|
||||
Console.WriteLine("Elapsed: " + sw.Elapsed.ToString());
|
||||
|
||||
GC.Collect();
|
||||
@@ -164,11 +174,21 @@ namespace testML
|
||||
|
||||
private static void MakePrediction(List<Dictionary<string, object>> tmpData, string columnToPredict)
|
||||
{
|
||||
var regexCR = new Regex(@"_CR\d+");
|
||||
|
||||
var currentCR = regexCR.Match(columnToPredict).Groups[0].Value;
|
||||
|
||||
|
||||
var firstRow = tmpData[0] as IDictionary<string, object>;
|
||||
|
||||
var hashKey = new StringBuilder();
|
||||
foreach (var key in firstRow.Keys.Where(x => !x.StartsWith("DESCENDIENTE_") && (x.Contains("_S4i") || x.Contains("_SNP"))).OrderBy(x => x))
|
||||
{
|
||||
if(!key.Contains(currentCR))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hashKey.Length > 0) { hashKey.Append("+"); }
|
||||
hashKey.Append(key);
|
||||
}
|
||||
@@ -177,6 +197,7 @@ namespace testML
|
||||
var hash = string.Join("", md5.ComputeHash(new MemoryStream(new UTF8Encoding(false).GetBytes(hashKey.ToString()))).Select(x => x.ToString("X2").ToUpper()).ToArray());
|
||||
|
||||
var modelFilename = columnToPredict + "." + hash + ".zip";
|
||||
var objectFilename = columnToPredict + "." + hash + ".dll";
|
||||
|
||||
|
||||
#endregion
|
||||
@@ -185,7 +206,6 @@ namespace testML
|
||||
MLContext mlContext = new MLContext();
|
||||
|
||||
|
||||
|
||||
mlContext.Log += (_, e) =>
|
||||
{
|
||||
if (e.Kind == Microsoft.ML.Runtime.ChannelMessageKind.Trace && e.Source.EndsWith(" Cursor")) { return; }
|
||||
@@ -203,7 +223,7 @@ namespace testML
|
||||
};
|
||||
|
||||
|
||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, out Type classType, out Type classPredictionType, out DataViewSchema schema);
|
||||
var dataConverted = DictionaryToObjectConverter.Convert(tmpData, columnToPredict, objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema);
|
||||
|
||||
ITransformer _trainedModel;
|
||||
if (!File.Exists(modelFilename))
|
||||
@@ -234,8 +254,9 @@ namespace testML
|
||||
|
||||
var pipeline = ProcessData(mlContext, columnToPredict, columnNameAndTypes);
|
||||
var trainingPipeline = BuildAndTrainModel(mlContext, trainData, pipeline, classType, classPredictionType);
|
||||
|
||||
|
||||
Console.WriteLine("Training...");
|
||||
|
||||
_trainedModel = trainingPipeline.Fit(trainData);
|
||||
|
||||
mlContext.Model.Save(_trainedModel, data.Schema, modelFilename);
|
||||
|
||||
Reference in New Issue
Block a user