HelperPdfStripperWrapper

This commit is contained in:
Mike Phares 2023-09-07 10:52:12 -07:00
parent 71062e4b69
commit a109115d02
4 changed files with 182 additions and 2 deletions

View File

@ -23,6 +23,7 @@
"Infineon", "Infineon",
"Kanban", "Kanban",
"kanbn", "kanbn",
"Kofax",
"NSFX", "NSFX",
"OBJE", "OBJE",
"onenote", "onenote",

View File

@ -86,7 +86,7 @@ internal static class HelperFindReplace
{ {
string[] files = Directory.GetFiles(args[0], "tnsNames.ora", SearchOption.AllDirectories); string[] files = Directory.GetFiles(args[0], "tnsNames.ora", SearchOption.AllDirectories);
if (files.Length == 0) if (files.Length == 0)
log.LogInformation("Count == {count}", findReplace.Count); log.LogInformation("Length == {length}", files.Length);
else else
FindReplace(log, files, findReplace); FindReplace(log, files, findReplace);
} }

View File

@ -0,0 +1,174 @@
using Microsoft.Extensions.Logging;
using System.Collections.ObjectModel;
using System.Diagnostics;
namespace File_Folder_Helper.Helpers;
internal static class HelperPdfStripperWrapper
{
private static (string?, string?) GetDestinationDirectory(List<string> args)
{
string? d = null;
string? k = null;
for (int i = 1; i < args.Count; i++)
{
if (args[i].Length == 2 && i + 1 < args.Count)
{
if (args[i][1] == 'd')
d = Path.GetFullPath(args[i + 1]);
else if (args[i][1] == 'k')
k = args[i + 1].Trim();
i++;
}
}
return (d, k);
}
private static string GetGhostTextFromPDF(string ghostPCLFileName, string sourceFileNamePdf, string destinationFileName)
{
string result;
if (File.Exists(destinationFileName))
File.Delete(destinationFileName);
//string arguments = $"-i \"{sourceFile}\" -o \"{result}\"";
string arguments = $"-dSAFER -dBATCH -dNOPAUSE -dFIXEDMEDIA -dFitPage -dAutoRotatePages=/All -dDEVICEWIDTHPOINTS=792 -dDEVICEHEIGHTPOINTS=612 -sOutputFile=\"{destinationFileName}\" -sDEVICE=pdfwrite \"{sourceFileNamePdf}\"";
//Process process = Process.Start(configData.LincPDFCFileName, arguments);
Process? process = Process.Start(ghostPCLFileName, arguments);
_ = process?.WaitForExit(30000);
if (!File.Exists(destinationFileName))
result = string.Empty;
else
result = File.ReadAllText(destinationFileName);
return result;
}
private static string GetKofaxTextFromPDF(string kofaxFileName, string sourceFileNamePdf, string destinationFileName)
{
string result;
if (File.Exists(destinationFileName))
File.Delete(destinationFileName);
string arguments = $"-inputFile\"{sourceFileNamePdf}\" -outputFile\"{destinationFileName}\" -TTIF";
Process? process = Process.Start(kofaxFileName, arguments);
_ = process?.WaitForExit(30000);
if (!File.Exists(destinationFileName))
result = string.Empty;
else
result = File.ReadAllText(destinationFileName);
return result;
}
private static string GetTextFromPDF(string pdfTextStripperFileName, string sourceFileNamePdf, string destinationFileName)
{
string result;
ProcessStartInfo processStartInfo = new(pdfTextStripperFileName, $"s \"{sourceFileNamePdf}\"")
{
UseShellExecute = false,
RedirectStandardError = true,
RedirectStandardOutput = true,
};
Process? process = Process.Start(processStartInfo);
_ = process?.WaitForExit(30000);
if (!File.Exists(destinationFileName))
result = string.Empty;
else
result = File.ReadAllText(destinationFileName);
return result;
}
private static void ParseSave(ILogger log, string pdfTextStripperFileName, string ghostPCLFileName, string kofaxFileName, string destinationDirectory, string[] files)
{
string text;
string destinationFileName;
if (!Directory.Exists(destinationDirectory))
_ = Directory.CreateDirectory(destinationDirectory);
foreach (string file in files)
{
destinationFileName = Path.Combine(destinationDirectory, $"{file}.txt");
text = GetTextFromPDF(pdfTextStripperFileName, file, destinationFileName);
if (string.IsNullOrEmpty(text))
text = GetGhostTextFromPDF(ghostPCLFileName, file, destinationFileName);
if (string.IsNullOrEmpty(text))
text = GetKofaxTextFromPDF(kofaxFileName, file, destinationFileName);
log.LogInformation("<{file}> == {length}", Path.GetFileName(file), text.Length);
}
}
internal static void ParseSave(ILogger log, List<string> args)
{
string pdfTextStripperFileName = Path.Combine(AppContext.BaseDirectory, "PDF-Text-Stripper.exe");
if (!File.Exists(pdfTextStripperFileName))
log.LogInformation("exe <{pdfTextStripperFileName}> doesn't exist!", pdfTextStripperFileName);
else
{
string ghostPCLFileName = Path.Combine(AppContext.BaseDirectory, "gPcl6win64.exe");
if (!File.Exists(ghostPCLFileName))
log.LogInformation("exe <{ghostPCLFileName}> doesn't exist!", ghostPCLFileName);
else
{
string kofaxFileName = "C:/Program Files (x86)/Kofax/Power PDF 50/batchConverter.com";
if (!File.Exists(kofaxFileName))
log.LogInformation("exe <{kofaxFileName}> doesn't exist!", kofaxFileName);
else
{
(string? destinationDirectory, string? _) = GetDestinationDirectory(args);
if (string.IsNullOrEmpty(destinationDirectory))
log.LogInformation("-d <{destinationDirectory}> wasn't supplied!", nameof(destinationDirectory));
else
{
string[] files = Directory.GetFiles(args[0], "*.pdf", SearchOption.TopDirectoryOnly);
if (files.Length == 0)
log.LogInformation("Length == {length}", files.Length);
else
ParseSave(log, pdfTextStripperFileName, ghostPCLFileName, kofaxFileName, destinationDirectory, files);
}
}
}
}
}
private static void ParseStrip(ILogger log, string destinationDirectory, string key, string[] files)
{
string[] lines;
string fileName;
string[] segments;
List<string> collection = new();
foreach (string file in files)
{
lines = File.ReadAllLines(file);
fileName = Path.GetFileName(file);
foreach (string line in lines)
{
segments = line.Split(':');
if (segments.Length < 2)
continue;
if (segments[0].Trim() != key)
continue;
collection.Add($"{fileName}\t{line}");
}
log.LogInformation("<{fileName}>", fileName);
}
if (collection.Count > 0)
File.WriteAllLines(Path.Combine(destinationDirectory, $"{key}.txt"), collection);
}
internal static void ParseStrip(ILogger log, List<string> args)
{
if (DateTime.Now > new DateTime(2023, 9, 15))
log.LogInformation("This helper was a short term helper!");
else
{
(string? destinationDirectory, string? key) = GetDestinationDirectory(args);
if (string.IsNullOrEmpty(key))
log.LogInformation("-k <{key}> wasn't supplied!", nameof(key));
else
{
string[] files = Directory.GetFiles(args[0], "*.txt", SearchOption.TopDirectoryOnly);
if (files.Length == 0)
log.LogInformation("Length == {length}", files.Length);
else
ParseStrip(log, args[0], key, files);
}
}
}
}

View File

@ -41,6 +41,7 @@ public class Worker : BackgroundService
ConsoleKey.M, ConsoleKey.M,
ConsoleKey.N, ConsoleKey.N,
ConsoleKey.O, ConsoleKey.O,
ConsoleKey.P,
ConsoleKey.R, ConsoleKey.R,
ConsoleKey.S, ConsoleKey.S,
ConsoleKey.T, ConsoleKey.T,
@ -105,7 +106,7 @@ public class Worker : BackgroundService
_Logger.LogInformation("N) Create Note Files,"); _Logger.LogInformation("N) Create Note Files,");
_Logger.LogInformation("M) Markdown Wiki Link Verification,"); _Logger.LogInformation("M) Markdown Wiki Link Verification,");
_Logger.LogInformation("O) Oracle tnsNames.ora,"); _Logger.LogInformation("O) Oracle tnsNames.ora,");
// P _Logger.LogInformation("P) PDF parse,");
// Q // Q
_Logger.LogInformation("R) Rename to old, copy, delete old,"); _Logger.LogInformation("R) Rename to old, copy, delete old,");
_Logger.LogInformation("S) Set Date from Zip Entry,"); _Logger.LogInformation("S) Set Date from Zip Entry,");
@ -158,6 +159,10 @@ public class Worker : BackgroundService
case ConsoleKey.O: case ConsoleKey.O:
Helpers.HelperFindReplace.UpdateTnsNames(_Logger, _Args); Helpers.HelperFindReplace.UpdateTnsNames(_Logger, _Args);
break; break;
case ConsoleKey.P:
Helpers.HelperPdfStripperWrapper.ParseSave(_Logger, _Args);
Helpers.HelperPdfStripperWrapper.ParseStrip(_Logger, _Args);
break;
case ConsoleKey.R: case ConsoleKey.R:
Helpers.HelperRenameToOldMoveDeleteOldMerge.RenameToOldMoveDeleteOld(_Logger, _Args[0]); Helpers.HelperRenameToOldMoveDeleteOldMerge.RenameToOldMoveDeleteOld(_Logger, _Args[0]);
break; break;