From c6923ed84ffca4433f75c46c6db20703d5499e97 Mon Sep 17 00:00:00 2001 From: Mike Phares Date: Wed, 9 Mar 2022 20:02:07 -0700 Subject: [PATCH] Added _ColorCollections --- Adaptation/.vscode/launch.json | 2 +- .../FileHandlers/DEP08SIASM/ProcessData.cs | 5 - Adaptation/FileHandlers/jpeg/FileRead.cs | 35 ++- Adaptation/FileHandlers/jpeg/ProcessData.cs | 280 +++++++++++++++--- DEP08SIASM.csproj | 1 - 5 files changed, 268 insertions(+), 55 deletions(-) delete mode 100644 Adaptation/FileHandlers/DEP08SIASM/ProcessData.cs diff --git a/Adaptation/.vscode/launch.json b/Adaptation/.vscode/launch.json index 3a8a092..0978537 100644 --- a/Adaptation/.vscode/launch.json +++ b/Adaptation/.vscode/launch.json @@ -4,7 +4,7 @@ "name": ".NET Core Attach", "type": "coreclr", "request": "attach", - "processId": 13700 + "processId": 9360 } ] } diff --git a/Adaptation/FileHandlers/DEP08SIASM/ProcessData.cs b/Adaptation/FileHandlers/DEP08SIASM/ProcessData.cs deleted file mode 100644 index b7a8fa1..0000000 --- a/Adaptation/FileHandlers/DEP08SIASM/ProcessData.cs +++ /dev/null @@ -1,5 +0,0 @@ -namespace Adaptation.FileHandlers.DEP08SIASM; - -public class ProcessData -{ -} \ No newline at end of file diff --git a/Adaptation/FileHandlers/jpeg/FileRead.cs b/Adaptation/FileHandlers/jpeg/FileRead.cs index 767e420..994a6be 100644 --- a/Adaptation/FileHandlers/jpeg/FileRead.cs +++ b/Adaptation/FileHandlers/jpeg/FileRead.cs @@ -4,6 +4,7 @@ using Adaptation.Shared; using Adaptation.Shared.Methods; using System; using System.Collections.Generic; +using System.Drawing; using System.IO; using System.Linq; using System.Reflection; @@ -14,9 +15,16 @@ namespace Adaptation.FileHandlers.jpeg; public class FileRead : Shared.FileRead, IFileRead { - protected string _LastText; + protected int _EndX; + protected int _EndY; + protected int _StartX; + protected int _StartY; + protected int[] _Bounds; protected long _LastChange; + protected string _LastText; + protected string _TessDataDirectory; protected readonly Dictionary _Reactors; + protected readonly List<(string, Color[])> _ColorCollections; public FileRead(ISMTP smtp, Dictionary fileParameter, string cellInstanceName, string cellInstanceConnectionName, FileConnectorConfiguration fileConnectorConfiguration, string equipmentTypeName, string parameterizedModelObjectDefinitionType, IList modelObjectParameters, string equipmentDictionaryName, Dictionary> dummyRuns, bool useCyclicalForDescription, bool isEAFHosted) : base(new Description(), true, smtp, fileParameter, cellInstanceName, cellInstanceConnectionName, fileConnectorConfiguration, equipmentTypeName, parameterizedModelObjectDefinitionType, modelObjectParameters, equipmentDictionaryName, dummyRuns, useCyclicalForDescription, isEAFHosted) @@ -44,11 +52,11 @@ public class FileRead : Shared.FileRead, IFileRead string x86 = Path.Combine(entryAssemblyLocationDirectory, "x86"); if (!Directory.Exists(x86)) _ = Directory.CreateDirectory(x86); - string tessdata = Path.Combine(entryAssemblyLocationDirectory, "tessdata"); - if (!Directory.Exists(tessdata)) - _ = Directory.CreateDirectory(tessdata); + _TessDataDirectory = Path.Combine(entryAssemblyLocationDirectory, "tessdata"); + if (!Directory.Exists(_TessDataDirectory)) + _ = Directory.CreateDirectory(_TessDataDirectory); string pdfttfSource = Path.Combine(entryAssemblyLocationDirectory, "pdf.ttf"); - string pdfttfDestination = Path.Combine(tessdata, Path.GetFileName(pdfttfSource)); + string pdfttfDestination = Path.Combine(_TessDataDirectory, Path.GetFileName(pdfttfSource)); if (File.Exists(pdfttfSource) && !File.Exists(pdfttfDestination)) File.Copy(pdfttfSource, pdfttfDestination); string tesseract41dllSource = Path.Combine(entryAssemblyLocationDirectory, "tesseract41.dll"); @@ -56,13 +64,24 @@ public class FileRead : Shared.FileRead, IFileRead if (File.Exists(tesseract41dllSource) && !File.Exists(tesseract41dllDestination)) File.Copy(tesseract41dllSource, tesseract41dllDestination); string engtraineddataSource = Path.Combine(entryAssemblyLocationDirectory, "eng.traineddata"); - string engtraineddataDestination = Path.Combine(tessdata, Path.GetFileName(engtraineddataSource)); + string engtraineddataDestination = Path.Combine(_TessDataDirectory, Path.GetFileName(engtraineddataSource)); if (File.Exists(engtraineddataSource) && !File.Exists(engtraineddataDestination)) File.Copy(engtraineddataSource, engtraineddataDestination); string leptonica1800dllSource = Path.Combine(entryAssemblyLocationDirectory, "leptonica-1.80.0.dll"); string leptonica1800dllDestination = Path.Combine(x86, Path.GetFileName(leptonica1800dllSource)); if (File.Exists(leptonica1800dllSource) && !File.Exists(leptonica1800dllDestination)) File.Copy(leptonica1800dllSource, leptonica1800dllDestination); + ModelObjectParameterDefinition[] images = GetProperties(cellInstanceConnectionName, modelObjectParameters, "Image."); + string startX = GetPropertyValue(cellInstanceConnectionName, images, "Image.StartX"); + string startY = GetPropertyValue(cellInstanceConnectionName, images, "Image.StartY"); + string endX = GetPropertyValue(cellInstanceConnectionName, images, "Image.EndX"); + string endY = GetPropertyValue(cellInstanceConnectionName, images, "Image.EndY"); + _StartX = int.Parse(startX); + _StartY = int.Parse(startY); + _EndX = int.Parse(endX); + _EndY = int.Parse(endY); + string masterImageDirectory = GetPropertyValue(cellInstanceConnectionName, modelObjectParameters, "Path.Memory.Master.Images"); + _ColorCollections = ProcessData.GetColorCollections(_StartX, _StartY, _EndX, _EndY, masterImageDirectory); } void IFileRead.Move(Tuple> extractResults, Exception exception) => Move(extractResults, exception); @@ -141,7 +160,7 @@ public class FileRead : Shared.FileRead, IFileRead results.Item4.Add(new FileInfo(reportFullPath)); else { - IProcessData iProcessData = new ProcessData(this, _Logistics, results.Item4); + IProcessData iProcessData = new ProcessData(this, _Logistics, results.Item4, _TessDataDirectory, _StartX, _StartY, _EndX, _EndY, _ColorCollections); if (iProcessData is ProcessData _) { if (!iProcessData.Details.Any()) @@ -156,7 +175,7 @@ public class FileRead : Shared.FileRead, IFileRead throw new Exception(string.Concat("C) No Data - ", dateTime.Ticks)); if (_LastText != _Logistics.MID || _LastChange < DateTime.Now.AddMinutes(-30).Ticks) { - _LastText = _Logistics.MesEntity; + _LastText = _Logistics.MID; _LastChange = DateTime.Now.Ticks; results = iProcessData.GetResults(this, _Logistics, results.Item4); } diff --git a/Adaptation/FileHandlers/jpeg/ProcessData.cs b/Adaptation/FileHandlers/jpeg/ProcessData.cs index 35b4dfe..45e1472 100644 --- a/Adaptation/FileHandlers/jpeg/ProcessData.cs +++ b/Adaptation/FileHandlers/jpeg/ProcessData.cs @@ -8,6 +8,7 @@ using System.Drawing; using System.IO; using System.Linq; using System.Text.Json; +using System.Text.RegularExpressions; using Tesseract; namespace Adaptation.FileHandlers.jpeg; @@ -24,7 +25,7 @@ public class ProcessData : IProcessData List Shared.Properties.IProcessData.Details => _Details; - public ProcessData(IFileRead fileRead, Logistics logistics, List fileInfoCollection) + public ProcessData(IFileRead fileRead, Logistics logistics, List fileInfoCollection, string tessDataDirectory, int startX, int startY, int endX, int endY, List<(string, Color[])> colorCollections) { if (logistics is null) { } @@ -33,7 +34,7 @@ public class ProcessData : IProcessData _Details = new List(); MesEntity = logistics.MesEntity; _Log = LogManager.GetLogger(typeof(ProcessData)); - Parse(fileRead, fileInfoCollection); + Parse(fileRead, fileInfoCollection, tessDataDirectory, startX, startY, endX, endY, colorCollections); } private static string Get(string value, bool useSplitForMID) @@ -96,33 +97,79 @@ public class ProcessData : IProcessData return results; } +#nullable enable #pragma warning disable CA1416 - private static (MemoryStream memoryStream, Color[]) Get(IFileRead fileRead, int thresHold, int startX, int startY, int endX, int endY) + private static Color[] Get(string reportFullPath, int startX, int startY, int endX, int endY) { Color color; List colors = new(); - MemoryStream memoryStream = new(); - int middle = (int)(endY - startY * .5); - Bitmap selectedBitmap = new(endX - startX, endY - startY); - using Bitmap bitmap = Image.FromFile(fileRead.ReportFullPath) as Bitmap; - System.Drawing.Imaging.ImageFormat imageFormat = System.Drawing.Imaging.ImageFormat.Png; + using Bitmap? bitmap = Image.FromFile(reportFullPath) as Bitmap; + if (bitmap is null) + throw new Exception($"Couldn't load image from <{reportFullPath}>"); for (int x = startX; x < endX; x++) { for (int y = startY; y < endY; y++) { color = bitmap.GetPixel(x, y); - if (y == middle) - colors.Add(color); - if (color.R > thresHold || color.G > thresHold || color.B > thresHold) - selectedBitmap.SetPixel(x - startX, y - startY, Color.Black); + colors.Add(color); + } + } + return colors.ToArray(); + } + + private static System.Drawing.Imaging.ImageFormat Get(string extension) + { + System.Drawing.Imaging.ImageFormat imageFormat = extension switch + { + ".bmp" => System.Drawing.Imaging.ImageFormat.Bmp, + ".gif" => System.Drawing.Imaging.ImageFormat.Gif, + ".jpeg" => System.Drawing.Imaging.ImageFormat.Jpeg, + ".jpg" => System.Drawing.Imaging.ImageFormat.Jpeg, + ".png" => System.Drawing.Imaging.ImageFormat.Png, + ".tiff" => System.Drawing.Imaging.ImageFormat.Tiff, + _ => throw new Exception("Extension not mapped"), + }; + return imageFormat; + } + + private static (Color[], int, int, MemoryStream) Get(string reportFullPath, string extension, int startX, int startY, int endX, int endY) + { + Color color; + List colors = new(); + MemoryStream memoryStream = new(); + Bitmap selectedBitmap = new(endX - startX, endY - startY); + System.Drawing.Imaging.ImageFormat imageFormat = Get(extension); + using Bitmap? bitmap = Image.FromFile(reportFullPath) as Bitmap; + if (bitmap is null) + throw new Exception($"Couldn't load image from <{reportFullPath}>"); + for (int x = startX; x < endX; x++) + { + for (int y = startY; y < endY; y++) + { + color = bitmap.GetPixel(x, y); + colors.Add(color); + selectedBitmap.SetPixel(x - startX, y - startY, color); } } selectedBitmap.Save(memoryStream, imageFormat); - return new(memoryStream, colors.ToArray()); + return new(colors.ToArray(), endX - startX, endY - startY, memoryStream); } - private static void SaveToFile(MemoryStream memoryStream, string extension, string saveFileName) + private static string Get(string saveFileName, string extension, string extra) + { + string result; + string? directoryName = Path.GetDirectoryName(saveFileName); + if (string.IsNullOrEmpty(directoryName)) + throw new Exception("Couldn't get directoryName!"); + string? fileNameWithoutExtension = Path.GetFileNameWithoutExtension(saveFileName); + if (string.IsNullOrEmpty(fileNameWithoutExtension)) + throw new Exception("Couldn't get fileNameWithoutExtension!"); + result = Path.Combine(directoryName, $"{fileNameWithoutExtension} - {extra}{extension}"); + return result; + } + + private static void SaveToFile(string extension, string saveFileName, MemoryStream memoryStream) { System.Drawing.Imaging.ImageFormat imageFormat = extension switch { @@ -138,44 +185,197 @@ public class ProcessData : IProcessData bitmap.Save(saveFileName, imageFormat); } + private static byte[] Get(bool development, string extension, string saveFileName, Color[] sourceColors, int width, int height, int thresHold, bool saveToFile) + { + int i = 0; + Color color; + MemoryStream memoryStream = new(); + Bitmap selectedBitmap = new(width, height); + System.Drawing.Imaging.ImageFormat imageFormat = Get(extension); + string newSaveFileName = Get(saveFileName, extension, thresHold.ToString("000")); + for (int x = 0; x < width; x++) + { + for (int y = 0; y < height; y++) + { + color = sourceColors[i]; + if (color.R > thresHold || color.G > thresHold || color.B > thresHold) + selectedBitmap.SetPixel(x, y, Color.Black); + i += 1; + } + } + selectedBitmap.Save(memoryStream, imageFormat); + if (development && saveToFile) + SaveToFile(extension, newSaveFileName, memoryStream); + byte[] bytes = memoryStream.GetBuffer(); + return bytes; + } + #pragma warning restore CA1416 - private static (MemoryStream, Color[]) Get(IFileRead fileRead, int thresHold) => Get(fileRead, thresHold, 330, 16, 400, 32); - - private void Parse(IFileRead fileRead, List fileInfoCollection) + private static void SaveText(int red, int green, string text, string textFileName, List<(string File, int TotalDelta)> totalDeltaCollection, int readAt, string reading, int score) { - int thresHold = 76; - (MemoryStream memoryStream, Color[] colors) = Get(fileRead, thresHold); - byte[] bytes = memoryStream.GetBuffer(); - using TesseractEngine engine = new(string.Empty, "eng", EngineMode.Default); - using Pix img = Pix.LoadFromMemory(bytes); - using Page page = engine.Process(img); - string text = page.GetText().Trim(); - if (!string.IsNullOrEmpty(text)) - _Log.Debug(text); - else + string format = "00000"; + List lines = new() { red.ToString(format), green.ToString(format), text }; + foreach ((string file, int totalDelta) in totalDeltaCollection) { - int red = 0; - int green = 0; - _Log.Debug("Looking by color"); - string extension = ".png"; - string saveFileName = Path.ChangeExtension(fileRead.ReportFullPath, extension); - SaveToFile(memoryStream, extension, saveFileName); - foreach (Color color in colors) + lines.Add(file); + lines.Add(totalDelta.ToString(format)); + } + lines.Add(readAt.ToString(format)); + lines.Add(reading); + lines.Add(score.ToString(format)); + File.WriteAllLines(textFileName, lines); + } + + internal static List<(string, Color[])> GetColorCollections(int startX, int startY, int endX, int endY, string masterImageDirectory) + { + List<(string, Color[])> results = new(); + string[] files = Directory.GetFiles(masterImageDirectory, "*.jpeg", SearchOption.TopDirectoryOnly); + foreach (string file in files) + results.Add(new(file, Get(file, startX, startY, endX, endY))); + return results; + } + + private void Parse(IFileRead fileRead, List fileInfoCollection, string tessDataDirectory, int startX, int startY, int endX, int endY, List<(string, Color[])> colorCollections) + { + Pix pix; + int delta; + Page page; + int readAt; + Color color; + int red = 0; + string text; + byte[] bytes; + int green = 0; + int totalDelta; + string textFileName; + string closestMatchFile; + const int thresHold = 70; + string closestMatchFileName; + List readings = new(); + const int upperThresHold = 153; + Regex regex = new(@"[^a-zA-Z]"); + const string extension = ".tiff"; + string[] closestMatchFileNameSplit; + char[] closestMatchFileNameDistinct; + List<(string File, int TotalDelta)> totalDeltaCollection = new(); + string saveFileName = Path.ChangeExtension(fileRead.ReportFullPath, extension); + (Color[] sourceColors, int width, int height, MemoryStream memoryStream) = Get(fileRead.ReportFullPath, extension, startX, startY, endX, endY); + foreach ((string file, Color[] colors) in colorCollections) + { + totalDelta = 0; + if (colors.Length != sourceColors.Length) + continue; + for (int i = 0; i < sourceColors.Length; i++) { + color = sourceColors[i]; if (color.R > thresHold) red += 1; if (color.G > thresHold) green += 1; + delta = color.R - colors[i].R; + if (delta > 0) + totalDelta += delta; + else + totalDelta += delta * -1; + delta = color.G - colors[i].G; + if (delta > 0) + totalDelta += delta; + else + totalDelta += delta * -1; } - if (red > green) - text = "Red*"; - else - text = "Green*"; - fileInfoCollection.Add(new FileInfo(saveFileName)); + totalDeltaCollection.Add(new(file, totalDelta)); } - if (memoryStream is not null) + totalDeltaCollection = (from l in totalDeltaCollection orderby l.TotalDelta select l).ToList(); + bytes = memoryStream.GetBuffer(); + closestMatchFile = totalDeltaCollection[0].File; + closestMatchFileNameSplit = Path.GetFileNameWithoutExtension(closestMatchFile).Split('-'); + closestMatchFileName = closestMatchFileNameSplit.Last().TrimStart(); + closestMatchFileNameDistinct = closestMatchFileName.Distinct().ToArray(); + using TesseractEngine tesseractEngine = new(tessDataDirectory, "eng", EngineMode.Default); + pix = Pix.LoadTiffFromMemory(bytes); + page = tesseractEngine.Process(pix); + text = page.GetText().Trim(); + pix.Dispose(); + page.Dispose(); + if (!fileRead.IsEAFHosted) + { + fileInfoCollection.Add(new FileInfo(saveFileName)); + SaveToFile(extension, saveFileName, memoryStream); + } + if (!string.IsNullOrEmpty(text)) + { + text = regex.Replace(text, string.Empty); + readings.Add(text); + } + if (text == closestMatchFileName) + { + readAt = thresHold; + _Log.Info(text); + textFileName = Get(saveFileName, ".txt", $"{thresHold:000} - {text}"); + if (!fileRead.IsEAFHosted) + { + fileInfoCollection.Add(new FileInfo(textFileName)); + SaveText(red, green, text, textFileName, totalDeltaCollection, readAt, text, int.MaxValue); + } + } + else + { + readAt = -1; memoryStream.Dispose(); + for (int i = thresHold; i < upperThresHold; i += 10) + { + bytes = Get(!fileRead.IsEAFHosted, extension, saveFileName, sourceColors, width, height, i, i == thresHold); + pix = Pix.LoadTiffFromMemory(bytes); + page = tesseractEngine.Process(pix); + text = page.GetText().Trim(); + pix.Dispose(); + page.Dispose(); + if (!string.IsNullOrEmpty(text)) + { + text = regex.Replace(text, string.Empty); + readings.Add(text); + } + if (text == closestMatchFileName) + { + readAt = i; + _Log.Info(text); + textFileName = Get(saveFileName, ".txt", $"{i:000} - {text}"); + if (!fileRead.IsEAFHosted) + { + fileInfoCollection.Add(new FileInfo(textFileName)); + SaveText(red, green, text, textFileName, totalDeltaCollection, readAt, text, int.MaxValue); + } + break; + } + } + } + if (readAt < thresHold) + { + int score; + char[] readingDistinct; + text = closestMatchFileName; + List<(string Reading, int Score)> readingEvaluations = new(); + foreach (string reading in readings) + { + score = 0; + readingDistinct = reading.Distinct().ToArray(); + for (int i = 0; i < closestMatchFileNameDistinct.Length; i++) + { + if (!readingDistinct.Contains(closestMatchFileNameDistinct[i])) + continue; + score += 1; + } + readingEvaluations.Add(new(reading, score)); + } + readingEvaluations = (from l in readingEvaluations orderby l.Score descending select l).ToList(); + textFileName = Get(saveFileName, ".txt", $"{readAt:000} - {readingEvaluations[0].Reading} - {text}"); + if (!fileRead.IsEAFHosted) + { + fileInfoCollection.Add(new FileInfo(textFileName)); + SaveText(red, green, text, textFileName, totalDeltaCollection, readAt, readingEvaluations[0].Reading, readingEvaluations[0].Score); + } + } _Details.Add(text); } diff --git a/DEP08SIASM.csproj b/DEP08SIASM.csproj index df318ce..516a66e 100644 --- a/DEP08SIASM.csproj +++ b/DEP08SIASM.csproj @@ -86,7 +86,6 @@ -