diff --git a/testML/App.config b/testML/App.config index d3e3403..f1950d6 100644 --- a/testML/App.config +++ b/testML/App.config @@ -37,6 +37,10 @@ + + + + diff --git a/testML/DictionaryToObjectConverter.cs b/testML/DictionaryToObjectConverter.cs index 02cb9b3..39b979a 100644 --- a/testML/DictionaryToObjectConverter.cs +++ b/testML/DictionaryToObjectConverter.cs @@ -20,7 +20,7 @@ namespace testML public static IEnumerable Convert(List> data, string toPredict, string objectFilename, out Type classType, out Type classPredictionType, out DataViewSchema schema) { var regexCR = new Regex(@"_CR\d+"); - var currentCR = regexCR.Match(toPredict).Groups[0].Value; + var currentCR = regexCR.Match(toPredict ?? string.Empty).Groups[0].Value; var schemaBuilder = new DataViewSchema.Builder(); @@ -31,7 +31,7 @@ namespace testML { foreach (var key in sample.Keys) { - if (!key.Contains(currentCR)) + if (currentCR != null && !key.Contains(currentCR)) { continue; } @@ -67,7 +67,7 @@ namespace testML } } - var className = "OBJ"+ Path.GetFileNameWithoutExtension(objectFilename).Replace(".", "_"); + var className = "OBJ" + Path.GetFileNameWithoutExtension(objectFilename).Replace(".", "_"); Assembly dllAssembly = null; if (File.Exists(Path.Combine(Environment.CurrentDirectory, objectFilename))) @@ -79,7 +79,7 @@ namespace testML var converter = new DictionaryToObjectConverterClass() { ClassName = className, - ToPredict = toPredict, + ToPredict = toPredict ?? "Empty", Definition = definition }; diff --git a/testML/Program.cs b/testML/Program.cs index 17306b1..2513abe 100644 --- a/testML/Program.cs +++ b/testML/Program.cs @@ -31,7 +31,7 @@ namespace testML { XSSFWorkbook wb; //using (FileStream file = new FileStream(@"C:\Users\miguel.maldonado\Downloads\entrenar_IAMenos.xlsx", FileMode.Open, FileAccess.Read)) - using (FileStream file = new FileStream(@"entrenar_IA (1).xlsx", FileMode.Open, FileAccess.Read)) + using (FileStream file = new FileStream(@"C:\Users\miki_\Downloads\entrenar_IA_2.xlsx", FileMode.Open, FileAccess.Read)) { wb = new XSSFWorkbook(file); } @@ -49,7 +49,7 @@ namespace testML for (var r = headerRow.RowNum + 1; r < sheet.LastRowNum - 1; r++) { - //if (r == 50) break; + //if (r == 100) break; Console.WriteLine(string.Format("{0} / {1}", r, sheet.LastRowNum - 1)); var row = sheet.GetRow(r); @@ -63,6 +63,9 @@ namespace testML var columnName = headerRow.GetCell(c)?.StringCellValue; var crCell = CRRow.GetCell(c)?.NumericCellValue; + if (CRRow.GetCell(c)?.CellType == NPOI.SS.UserModel.CellType.Blank) { crCell = null; } + + columnName = FixColumnName(columnName); object value = null; @@ -133,7 +136,9 @@ namespace testML } } - foreach (var key in firstRow.Keys) + S4i_Simulador.S4i_SimularCruces.Run(tmpData); + + /*foreach (var key in firstRow.Keys) { if (key.StartsWith("DESCENDIENTE_S4i") || key.StartsWith("DESCENDIENTE_SNP")) @@ -165,7 +170,7 @@ namespace testML } } - } + }*/ Console.WriteLine(); Console.WriteLine("Press enter to Exit"); diff --git a/testML/S4i_SimularCruces.cs b/testML/S4i_SimularCruces.cs new file mode 100644 index 0000000..b0de15c --- /dev/null +++ b/testML/S4i_SimularCruces.cs @@ -0,0 +1,404 @@ +using FFSoft.SQLiteUtilities; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Operations; +using Microsoft.ML; +using NPOI.SS.Formula.Functions; +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.IO; +using System.Linq; +using System.Net; +using System.Reflection; +using System.Reflection.PortableExecutable; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using Tensorboard; + +namespace testML.S4i_Simulador +{ + internal class S4i_SimularCruces + { + static Regex removeCr = new Regex(@"(.+)_CR\d+$", RegexOptions.IgnoreCase | RegexOptions.Compiled); + static Regex columnByModelName = new Regex(@"DESCENDIENTE_(.+_CR\d+)\.", RegexOptions.IgnoreCase | RegexOptions.Compiled); + + static List> sourceData; + static Dictionary allColumns; + static Dictionary modelosIA; + static MLContext mlContext; + + public static void Run(List> sourceData) + { + mlContext = new MLContext(); + modelosIA = LoadModels(); + + S4i_SimularCruces.sourceData = sourceData; + + var all = (from x in ObtenerIndividuos() select x).ToList(); + { + allColumns = new Dictionary(); + + // Buscamos el nombre de todos los atributos + foreach (var row in all) + { + foreach (var col in row.Values.Keys) + { + if (!allColumns.ContainsKey(col)) + { + var m = removeCr.Match(col); + if (!m.Success) { continue; } + + allColumns.Add(col, m.Groups[1].Value); + } + } + } + } + + // Añadimos todos los atributos que falten + foreach (var row in all) + { + foreach (var col in allColumns.Keys) + { + if (!row.Values.ContainsKey(col)) + { + row.Values.Add(col, null); + } + } + } + + //foreach(var column in modelosIA.Keys) + //{ + // if(allColumns.ContainsKey(column)) + // { + // allColumns.Remove(column); + // } + //} + + var allCodes = (from x in all select x.Code).Distinct().ToList(); + + var individuos = (from x in allCodes select all.FirstOrDefault(y => y.Code == x)).ToArray(); + all = null; + + var outputDB = "Crosses.sqlite"; + + if (!File.Exists(outputDB)) + { + SQLiteConnector.CreateDatabase(outputDB); + } + + using (var db = new SQLiteConnector(outputDB)) + { + db.Execute("PRAGMA journal_mode=wal"); + db.Execute("PRAGMA wal_autocheckpoint=1000;"); + db.Execute("VACUUM"); + #region Crear tabla de los individuos + + db.Execute("DROP TABLE IF EXISTS Individuo;"); + var sql = new StringBuilder(); + sql.AppendLine(@"CREATE TABLE Individuo (Code TEXT PRIMARY KEY UNIQUE NOT NULL"); + + foreach (var col in allColumns.Values) + { + sql.Append(", "); + sql.Append(col); + sql.Append(" TEXT"); + } + sql.AppendLine(");"); + db.Execute(sql.ToString()); + + db.BeginTransaction(); + foreach (var i in individuos) + { + db.Execute("INSERT INTO Individuo(Code) VALUES({0})", i.Code); + + + var cmd = db.GetCommand(); + sql.Clear(); + sql.Append("UPDATE Individuo SET "); + var addComa = false; + foreach (var col in allColumns.Keys) + { + var name = allColumns[col]; + + if (addComa) + { + sql.Append(", "); + } + else + { + addComa = true; + } + + + sql.Append(name); + sql.Append(" = "); + + var param = "@p" + name; + sql.Append(param); + + var v = i.Values[col]; + if (string.IsNullOrEmpty(v as string)) { v = null; } + + cmd[param] = v?.ToString().Substring(name.Length + 1); + } + sql.Append(" WHERE Code = @pCode"); + cmd["@pCode"] = i.Code; + + cmd.SQL = sql.ToString(); + db.Execute(cmd); + } + db.EndTransaction(true); + #endregion + + + #region Creamos la tabla de la simulación + + db.Execute("DROP TABLE IF EXISTS Cruce;"); + sql.Clear(); + sql.AppendLine(@"CREATE TABLE Cruce (Id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, Female TEXT NOT NULL, Male TEXT NOT NULL"); + + foreach (var col in allColumns.Values) + { + sql.Append(", "); + sql.Append(col); + sql.Append(" TEXT"); + } + sql.AppendLine(");"); + db.Execute(sql.ToString()); + + // Le hacemos un índice + db.Execute(@"CREATE UNIQUE INDEX IX_Cruce_FemaleMale ON Cruce ( + Female COLLATE BINARY ASC, + Male COLLATE BINARY ASC +);"); + + #endregion + + db.BeginTransaction(); + //ELiminamos columnas + foreach (var col in allColumns.Keys) + { + if (!modelosIA.ContainsKey(col)) + { + db.Execute("ALTER TABLE Cruce DROP COLUMN " + allColumns[col] + ";"); + } + } + db.EndTransaction(true); + + + var limit = individuos.Length * individuos.Length; + var current = 0; + foreach (var asFemale in individuos) + { + FillModel("FEMENINO_", asFemale); + + foreach (var asMale in individuos) + { + current++; + + if (asFemale.Code == asMale.Code) { continue; } // Ignoramos las autofecundaciones + + // Si existe el curce en un sentido o en el otro lo ignoramos + if (db.Exists("SELECT * FROM Cruce WHERE (Female = {0} AND Male = {1}) OR (Female = {1} AND Male = {0})", asFemale.Code, asMale.Code)) + { + continue; + } + + FillModel("MASCULINO_", asMale); + + Console.WriteLine("{2}/{3} {4}%: Simulating {0} x {1}", + asFemale.Code, + asMale.Code, + current.ToString("#,##0"), + limit.ToString("#,##0"), + ((current / (double)limit) * 100.0).ToString("#,##0.00")); + SimulateCross(db, asFemale, asMale); + } + } + + } + + + } + + private static void SimulateCross(SQLiteConnector db, Individuo asFemale, Individuo asMale) + { + var cmd = db.GetCommand(); + var sql = new StringBuilder(); + sql.Append("INSERT INTO Cruce(Female, Male"); + foreach (var model in modelosIA.Values) + { + sql.Append(", " + allColumns[model.Name]); + } + sql.Append(") VALUES(@pFemale, @pMale"); + + cmd["@pFemale"] = asFemale.Code; + cmd["@pMale"] = asMale.Code; + foreach (var model in modelosIA.Values) + { + sql.Append(", @p" + allColumns[model.Name]); + } + sql.Append(");"); + + + var tasks = new Task[modelosIA.Values.Count]; + var c = 0; + foreach (var model in modelosIA.Values) + { + tasks[c] = MakePrediction(model); + c++; + } + Task.WaitAll(tasks); + + foreach (var model in modelosIA.Values) + { + var colDB = allColumns[model.Name]; + var value = model.Prediction.GetValue("DESCENDIENTE_" + model.Name) as string; + if (value != null && value.StartsWith(colDB)) + { + value = value.Substring(colDB.Length + 1); + } + + cmd["@p" + colDB] = value; + } + cmd.SQL = sql.ToString(); + db.Execute(cmd); + + } + + private static Task MakePrediction(ModelosIA model) + { + return Task.Run(() => + { + model.Prediction = (IDictionaryToObjectConverter)model.PredictionEngineMethod.Invoke(model.PredictionEngine, model.PredictionEngineMethodParameter); + }); + } + + private static void FillModel(string prefix, Individuo individuo) + { + foreach (var model in modelosIA.Values) + { + foreach (var col in allColumns.Keys) + { + model.Data.SetValue(prefix + col, individuo.Values[col]); + } + } + } + + private static Dictionary LoadModels() + { + var count = 0; + var result = new Dictionary(); + var tasks = new List(); + foreach (var filename in Directory.GetFiles("Modelos", "*.zip")) + { + count++; + tasks.Add(LoadModels(result, filename)); + } + + Task.WaitAll(tasks.ToArray()); + + return result; + } + + private static Task LoadModels(Dictionary result, string filename) + { + return Task.Run(() => + { + //if (count >= 5) break; + var name = Path.GetFileNameWithoutExtension(filename); + + var matchName = columnByModelName.Match(name); + + Console.WriteLine("Loading model for: " + matchName.Groups[1].Value); + + var model = new ModelosIA(); + model.Name = matchName.Groups[1].Value; + model.Model = mlContext.Model.Load(filename, out DataViewSchema schema); + model.Schema = schema; + + var dll = Path.Combine(Path.GetDirectoryName(filename), name + ".dll"); + if (File.Exists(dll)) + { + var a = Assembly.LoadFrom(dll); + var obj = "OBJ" + name.Replace(".", "_"); + + var dataType = a.GetType("DictionaryToObjectConverterNamespace." + obj); + var predictionType = a.GetType("DictionaryToObjectConverterNamespace." + obj + "Prediction"); + + model.Data = (IDictionaryToObjectConverter)Activator.CreateInstance(dataType); + + var createPredictionEngineMethod = mlContext.Model.GetType().GetMethods().Where(x => x.Name == "CreatePredictionEngine" && x.IsGenericMethodDefinition).FirstOrDefault(); + var createPredictionEngineMethodObj = createPredictionEngineMethod.MakeGenericMethod(dataType, predictionType); + model.PredictionEngine = createPredictionEngineMethodObj.Invoke(mlContext.Model, new object[] { model.Model, null, null, null }); + + //Test + model.PredictionEngineMethod = model.PredictionEngine.GetType().GetMethods().Where(x => x.Name == "Predict" && x.GetParameters().Length == 1 && x.GetParameters()[0].ParameterType == dataType).FirstOrDefault(); + model.PredictionEngineMethodParameter = new object[] { model.Data }; + + } + + lock (result) + { + result.Add(model.Name, model); + } + + Console.WriteLine("Model for: " + matchName.Groups[1].Value + " loaded!"); + + }); + } + + private static IEnumerable ObtenerIndividuos() + { + foreach (var item in sourceData) + { + yield return ObtenerIndividuo(item, "PMASCULINO", "MASCULINO_"); + yield return ObtenerIndividuo(item, "PFEMENINO", "FEMENINO_"); + yield return ObtenerIndividuo(item, "DESCENDIENTE", "DESCENDIENTE_"); + } + } + + private static Individuo ObtenerIndividuo(Dictionary item, string name, string dataPrefix) + { + var result = new Individuo() + { + Code = item[name].ToString().Trim(), + Values = new Dictionary() + + }; + + foreach (var key in item.Keys) + { + if (key.StartsWith(dataPrefix)) + { + result.Values.Add(key.Substring(dataPrefix.Length), item[key]); + } + } + + return result; + } + } + + public class Individuo + { + public string Code { get; set; } + + public Dictionary Values { get; set; } + } + + public class ModelosIA + { + public string Name { get; set; } + + public ITransformer Model { get; set; } + public DataViewSchema Schema { get; set; } + public IDictionaryToObjectConverter Data { get; set; } + public IDictionaryToObjectConverter Prediction { get; set; } + + public object PredictionEngine { get; set; } + public MethodInfo PredictionEngineMethod { get; set; } + public object[] PredictionEngineMethodParameter { get; set; } + } + +} diff --git a/testML/packages.config b/testML/packages.config index 37132f8..131b3c3 100644 --- a/testML/packages.config +++ b/testML/packages.config @@ -33,14 +33,16 @@ - + + + diff --git a/testML/testML.csproj b/testML/testML.csproj index 1bc970e..adf89fa 100644 --- a/testML/testML.csproj +++ b/testML/testML.csproj @@ -49,7 +49,16 @@ testML.Program + + true + false + false + false + + + ..\..\..\..\..\..\Desarrollo\SQLiteUtilities\SQLiteUtilities\bin\Debug\FFSoft.SQLiteUtilities.dll + ..\packages\Google.Protobuf.3.21.9\lib\net45\Google.Protobuf.dll @@ -187,6 +196,9 @@ ..\packages\System.Collections.Immutable.5.0.0\lib\net461\System.Collections.Immutable.dll + + ..\packages\Stub.System.Data.SQLite.Core.NetFramework.1.0.117.0\lib\net46\System.Data.SQLite.dll + ..\packages\System.IO.FileSystem.AccessControl.4.5.0\lib\net461\System.IO.FileSystem.AccessControl.dll @@ -250,6 +262,7 @@ True DictionaryToObjectConverterClass.tt + @@ -293,6 +306,7 @@ + @@ -302,4 +316,5 @@ + \ No newline at end of file