From 9ceed5b5a4ec2e3b1b70365a3b8597d81e88688d Mon Sep 17 00:00:00 2001 From: Mike Phares Date: Wed, 23 Feb 2022 18:13:30 -0700 Subject: [PATCH] Cropped Image --- Adaptation/FileHandlers/jpeg/ProcessData.cs | 77 +++++++++------------ 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/Adaptation/FileHandlers/jpeg/ProcessData.cs b/Adaptation/FileHandlers/jpeg/ProcessData.cs index 184b3c2..5613b21 100644 --- a/Adaptation/FileHandlers/jpeg/ProcessData.cs +++ b/Adaptation/FileHandlers/jpeg/ProcessData.cs @@ -7,6 +7,8 @@ using log4net; using System; using System.Collections.Generic; using System.Data; +using System.Drawing; +using System.Drawing.Imaging; using System.Globalization; using System.IO; using System.Linq; @@ -33,8 +35,6 @@ public class ProcessData : IProcessData { if (logistics is null) { } - if (fileInfoCollection is null) - { } JobID = logistics.JobID; fileInfoCollection.Clear(); _Details = new List(); @@ -103,51 +103,42 @@ public class ProcessData : IProcessData return results; } +#pragma warning disable CA1416 + + private static byte[] GetBytes(IFileRead fileRead, int endY, int endX, int startY, int startX, int outputQuality) + { + byte[] results; + Point startPoint = new(startX, startY); + using MemoryStream memoryStream = new(); + EncoderParameters encoderParameters = new(1); + System.Drawing.Imaging.ImageFormat imageFormat = System.Drawing.Imaging.ImageFormat.Jpeg; + ImageCodecInfo imageCodecInfo = (from l in ImageCodecInfo.GetImageEncoders() where l.FormatID == imageFormat.Guid select l).First(); + encoderParameters.Param[0] = new EncoderParameter(System.Drawing.Imaging.Encoder.Quality, outputQuality); + Rectangle rectangle = new(startPoint, new Size(endX - startX, endY - startY)); + using Bitmap bitmap = Image.FromFile(fileRead.ReportFullPath) as Bitmap; + using Bitmap clonedBitmap = bitmap.Clone(rectangle, bitmap.PixelFormat); + //clonedBitmap.Save(Path.ChangeExtension(fileRead.ReportFullPath, ".png"), System.Drawing.Imaging.ImageFormat.Png); + clonedBitmap.Save(memoryStream, imageCodecInfo, encoderParameters); + results = memoryStream.GetBuffer(); + return results; + } + +#pragma warning restore CA1416 + + private static byte[] GetBytes(IFileRead fileRead) => GetBytes(fileRead, 68, 1687, 32, 1094, 95); + private void Parse(IFileRead fileRead) { - List blocks = new(); - StringBuilder stringBuilder = new(); + string text; using TesseractEngine engine = new(string.Empty, "eng", EngineMode.Default); - using Pix img = Pix.LoadFromFile(fileRead.ReportFullPath); + //using Pix img = Pix.LoadFromFile(fileRead.ReportFullPath); + byte[] bytes = GetBytes(fileRead); + using Pix img = Pix.LoadFromMemory(bytes); using Page page = engine.Process(img); - string text = page.GetText(); - _Log.Debug(string.Format("Mean confidence: {0}", page.GetMeanConfidence())); - _Log.Debug(string.Format("Text (GetText): \r\n{0}", text)); - _Log.Debug("Text (iterator):"); - using ResultIterator iter = page.GetIterator(); - iter.Begin(); - do - { - do - { - do - { - do - { - _ = stringBuilder.Append(iter.GetText(PageIteratorLevel.Word)).Append(' '); - if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word) && stringBuilder.Length > 0) - { - blocks.Add(stringBuilder.ToString()); - _ = stringBuilder.Clear(); - } - } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); - if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine)) - _ = stringBuilder.AppendLine(); - } while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine)); - } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para)); - } while (iter.Next(PageIteratorLevel.Block)); - if (stringBuilder.Length > 0) - blocks.Add(stringBuilder.ToString()); - if (!blocks.Any()) - _Details.Add(text); - else - { - blocks = (from l in blocks where l.Split(':').Length == 3 select l).ToList(); - if (!blocks.Any()) - _Details.Add(text); - else - _Details.Add(blocks[0]); - } + text = page.GetText(); + if (string.IsNullOrEmpty(text)) + throw new Exception("OCR Failure!"); + _Details.Add(text); } } \ No newline at end of file