From a109115d0253d28e1918956d27480333fc823964 Mon Sep 17 00:00:00 2001 From: Mike Phares Date: Thu, 7 Sep 2023 10:52:12 -0700 Subject: [PATCH] HelperPdfStripperWrapper --- .vscode/settings.json | 1 + Helpers/HelperFindReplace.cs | 2 +- Helpers/HelperPdfStripperWrapper.cs | 174 ++++++++++++++++++++++++++++ Worker.cs | 7 +- 4 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 Helpers/HelperPdfStripperWrapper.cs diff --git a/.vscode/settings.json b/.vscode/settings.json index 7cdc830..ed99375 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,6 +23,7 @@ "Infineon", "Kanban", "kanbn", + "Kofax", "NSFX", "OBJE", "onenote", diff --git a/Helpers/HelperFindReplace.cs b/Helpers/HelperFindReplace.cs index 7d28cfe..1d3bb9f 100644 --- a/Helpers/HelperFindReplace.cs +++ b/Helpers/HelperFindReplace.cs @@ -86,7 +86,7 @@ internal static class HelperFindReplace { string[] files = Directory.GetFiles(args[0], "tnsNames.ora", SearchOption.AllDirectories); if (files.Length == 0) - log.LogInformation("Count == {count}", findReplace.Count); + log.LogInformation("Length == {length}", files.Length); else FindReplace(log, files, findReplace); } diff --git a/Helpers/HelperPdfStripperWrapper.cs b/Helpers/HelperPdfStripperWrapper.cs new file mode 100644 index 0000000..9ba0016 --- /dev/null +++ b/Helpers/HelperPdfStripperWrapper.cs @@ -0,0 +1,174 @@ +using Microsoft.Extensions.Logging; +using System.Collections.ObjectModel; +using System.Diagnostics; + +namespace File_Folder_Helper.Helpers; + +internal static class HelperPdfStripperWrapper +{ + + private static (string?, string?) GetDestinationDirectory(List args) + { + string? d = null; + string? k = null; + for (int i = 1; i < args.Count; i++) + { + if (args[i].Length == 2 && i + 1 < args.Count) + { + if (args[i][1] == 'd') + d = Path.GetFullPath(args[i + 1]); + else if (args[i][1] == 'k') + k = args[i + 1].Trim(); + i++; + } + } + return (d, k); + } + + private static string GetGhostTextFromPDF(string ghostPCLFileName, string sourceFileNamePdf, string destinationFileName) + { + string result; + if (File.Exists(destinationFileName)) + File.Delete(destinationFileName); + //string arguments = $"-i \"{sourceFile}\" -o \"{result}\""; + string arguments = $"-dSAFER -dBATCH -dNOPAUSE -dFIXEDMEDIA -dFitPage -dAutoRotatePages=/All -dDEVICEWIDTHPOINTS=792 -dDEVICEHEIGHTPOINTS=612 -sOutputFile=\"{destinationFileName}\" -sDEVICE=pdfwrite \"{sourceFileNamePdf}\""; + //Process process = Process.Start(configData.LincPDFCFileName, arguments); + Process? process = Process.Start(ghostPCLFileName, arguments); + _ = process?.WaitForExit(30000); + if (!File.Exists(destinationFileName)) + result = string.Empty; + else + result = File.ReadAllText(destinationFileName); + return result; + } + + private static string GetKofaxTextFromPDF(string kofaxFileName, string sourceFileNamePdf, string destinationFileName) + { + string result; + if (File.Exists(destinationFileName)) + File.Delete(destinationFileName); + string arguments = $"-inputFile\"{sourceFileNamePdf}\" -outputFile\"{destinationFileName}\" -TTIF"; + Process? process = Process.Start(kofaxFileName, arguments); + _ = process?.WaitForExit(30000); + if (!File.Exists(destinationFileName)) + result = string.Empty; + else + result = File.ReadAllText(destinationFileName); + return result; + } + + private static string GetTextFromPDF(string pdfTextStripperFileName, string sourceFileNamePdf, string destinationFileName) + { + string result; + ProcessStartInfo processStartInfo = new(pdfTextStripperFileName, $"s \"{sourceFileNamePdf}\"") + { + UseShellExecute = false, + RedirectStandardError = true, + RedirectStandardOutput = true, + }; + Process? process = Process.Start(processStartInfo); + _ = process?.WaitForExit(30000); + if (!File.Exists(destinationFileName)) + result = string.Empty; + else + result = File.ReadAllText(destinationFileName); + return result; + } + + private static void ParseSave(ILogger log, string pdfTextStripperFileName, string ghostPCLFileName, string kofaxFileName, string destinationDirectory, string[] files) + { + string text; + string destinationFileName; + if (!Directory.Exists(destinationDirectory)) + _ = Directory.CreateDirectory(destinationDirectory); + foreach (string file in files) + { + destinationFileName = Path.Combine(destinationDirectory, $"{file}.txt"); + text = GetTextFromPDF(pdfTextStripperFileName, file, destinationFileName); + if (string.IsNullOrEmpty(text)) + text = GetGhostTextFromPDF(ghostPCLFileName, file, destinationFileName); + if (string.IsNullOrEmpty(text)) + text = GetKofaxTextFromPDF(kofaxFileName, file, destinationFileName); + log.LogInformation("<{file}> == {length}", Path.GetFileName(file), text.Length); + } + } + + internal static void ParseSave(ILogger log, List args) + { + string pdfTextStripperFileName = Path.Combine(AppContext.BaseDirectory, "PDF-Text-Stripper.exe"); + if (!File.Exists(pdfTextStripperFileName)) + log.LogInformation("exe <{pdfTextStripperFileName}> doesn't exist!", pdfTextStripperFileName); + else + { + string ghostPCLFileName = Path.Combine(AppContext.BaseDirectory, "gPcl6win64.exe"); + if (!File.Exists(ghostPCLFileName)) + log.LogInformation("exe <{ghostPCLFileName}> doesn't exist!", ghostPCLFileName); + else + { + string kofaxFileName = "C:/Program Files (x86)/Kofax/Power PDF 50/batchConverter.com"; + if (!File.Exists(kofaxFileName)) + log.LogInformation("exe <{kofaxFileName}> doesn't exist!", kofaxFileName); + else + { + (string? destinationDirectory, string? _) = GetDestinationDirectory(args); + if (string.IsNullOrEmpty(destinationDirectory)) + log.LogInformation("-d <{destinationDirectory}> wasn't supplied!", nameof(destinationDirectory)); + else + { + string[] files = Directory.GetFiles(args[0], "*.pdf", SearchOption.TopDirectoryOnly); + if (files.Length == 0) + log.LogInformation("Length == {length}", files.Length); + else + ParseSave(log, pdfTextStripperFileName, ghostPCLFileName, kofaxFileName, destinationDirectory, files); + } + } + } + } + } + + private static void ParseStrip(ILogger log, string destinationDirectory, string key, string[] files) + { + string[] lines; + string fileName; + string[] segments; + List collection = new(); + foreach (string file in files) + { + lines = File.ReadAllLines(file); + fileName = Path.GetFileName(file); + foreach (string line in lines) + { + segments = line.Split(':'); + if (segments.Length < 2) + continue; + if (segments[0].Trim() != key) + continue; + collection.Add($"{fileName}\t{line}"); + } + log.LogInformation("<{fileName}>", fileName); + } + if (collection.Count > 0) + File.WriteAllLines(Path.Combine(destinationDirectory, $"{key}.txt"), collection); + } + + internal static void ParseStrip(ILogger log, List args) + { + if (DateTime.Now > new DateTime(2023, 9, 15)) + log.LogInformation("This helper was a short term helper!"); + else + { + (string? destinationDirectory, string? key) = GetDestinationDirectory(args); + if (string.IsNullOrEmpty(key)) + log.LogInformation("-k <{key}> wasn't supplied!", nameof(key)); + else + { + string[] files = Directory.GetFiles(args[0], "*.txt", SearchOption.TopDirectoryOnly); + if (files.Length == 0) + log.LogInformation("Length == {length}", files.Length); + else + ParseStrip(log, args[0], key, files); + } + } + } + +} \ No newline at end of file diff --git a/Worker.cs b/Worker.cs index 0d98689..5b826d0 100644 --- a/Worker.cs +++ b/Worker.cs @@ -41,6 +41,7 @@ public class Worker : BackgroundService ConsoleKey.M, ConsoleKey.N, ConsoleKey.O, + ConsoleKey.P, ConsoleKey.R, ConsoleKey.S, ConsoleKey.T, @@ -105,7 +106,7 @@ public class Worker : BackgroundService _Logger.LogInformation("N) Create Note Files,"); _Logger.LogInformation("M) Markdown Wiki Link Verification,"); _Logger.LogInformation("O) Oracle tnsNames.ora,"); - // P + _Logger.LogInformation("P) PDF parse,"); // Q _Logger.LogInformation("R) Rename to old, copy, delete old,"); _Logger.LogInformation("S) Set Date from Zip Entry,"); @@ -158,6 +159,10 @@ public class Worker : BackgroundService case ConsoleKey.O: Helpers.HelperFindReplace.UpdateTnsNames(_Logger, _Args); break; + case ConsoleKey.P: + Helpers.HelperPdfStripperWrapper.ParseSave(_Logger, _Args); + Helpers.HelperPdfStripperWrapper.ParseStrip(_Logger, _Args); + break; case ConsoleKey.R: Helpers.HelperRenameToOldMoveDeleteOldMerge.RenameToOldMoveDeleteOld(_Logger, _Args[0]); break;