using Adaptation.Shared; using System; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Diagnostics; using System.IO; using System.Linq; namespace Adaptation.FileHandlers.pcl; internal class Convert { /// /// Convert the raw data file to parsable file format - in this case from PCL to PDF /// /// source file to be converted to PDF /// private static string ConvertSourceFileToPdf(string ghostPCLFileName, Logistics logistics) { string result = Path.ChangeExtension(logistics.ReportFullPath, ".pdf"); if (!File.Exists(result)) { //string arguments = string.Concat("-i \"", sourceFile, "\" -o \"", result, "\""); string arguments = string.Concat("-dSAFER -dBATCH -dNOPAUSE -sOutputFile=\"", result, "\" -sDEVICE=pdfwrite \"", logistics.ReportFullPath, "\""); //Process process = Process.Start(configData.LincPDFCFileName, arguments); Process process = Process.Start(ghostPCLFileName, arguments); _ = process.WaitForExit(30000); if (!File.Exists(result)) throw new Exception("PDF file wasn't created"); } return result; } private static Dictionary PortableDocumentFormatSplit(string pdfTextStripperFileName, string sourcePath, string sourceFileNamePdf) { Dictionary results = new(); ProcessStartInfo processStartInfo = new(pdfTextStripperFileName, $"s \"{sourceFileNamePdf}\"") { UseShellExecute = false, RedirectStandardError = true, RedirectStandardOutput = true, }; Process process = Process.Start(processStartInfo); _ = process.WaitForExit(30000); string text; string checkFile; string[] pdfFiles = Directory.GetFiles(sourcePath, "*.pdf", SearchOption.TopDirectoryOnly); string[] textFiles = Directory.GetFiles(sourcePath, "*.txt", SearchOption.TopDirectoryOnly); foreach (string pdfFile in pdfFiles) { if (pdfFile == sourceFileNamePdf) continue; checkFile = Path.ChangeExtension(pdfFile, ".txt"); if (!textFiles.Contains(checkFile)) continue; text = File.ReadAllText(checkFile); results.Add(pdfFile, text); } return results; } internal static ReadOnlyDictionary PDF(Logistics logistics, string ghostPCLFileName, string pdfTextStripperFileName, List fileInfoCollection) { Dictionary results = new(); object item; string pageText; string pagePDFFile; string pageTextFile; List sourceFiles = new(); string sourceFileNamePdf = ConvertSourceFileToPdf(ghostPCLFileName, logistics); sourceFiles.Add(sourceFileNamePdf); string sourcePath = Path.GetDirectoryName(logistics.ReportFullPath) ?? throw new Exception(); string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(logistics.ReportFullPath); string[] txtFiles = Directory.GetFiles(sourcePath, $"{sourceFileNameWithoutExtension}_*.txt", SearchOption.TopDirectoryOnly); if (txtFiles.Length != 0) { foreach (string txtFile in txtFiles) { sourceFiles.Add(txtFile); pageText = File.ReadAllText(txtFile); pagePDFFile = Path.ChangeExtension(txtFile, ".pdf"); if (!File.Exists(pagePDFFile)) continue; results.Add(pagePDFFile, pageText); } } if (results.Count == 0) { try { java.io.File file = new(sourceFileNamePdf); org.apache.pdfbox.util.Splitter splitter = new(); org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); java.util.List list = splitter.split(pdDocument); java.util.ListIterator iterator = list.listIterator(); org.apache.pdfbox.util.PDFTextStripper dataStripper = new(); for (short i = 1; i < short.MaxValue; i++) { if (!iterator.hasNext()) break; item = iterator.next(); pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf"); pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt"); if (File.Exists(pageTextFile)) { pageText = File.ReadAllText(pageTextFile); sourceFiles.Add(pageTextFile); if (item is not org.apache.pdfbox.pdmodel.PDDocument pd) continue; pd.close(); } else if (File.Exists(pagePDFFile)) { org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile); pageText = dataStripper.getText(document); document.close(); sourceFiles.Add(pagePDFFile); if (item is not org.apache.pdfbox.pdmodel.PDDocument pd) continue; pd.close(); } else { if (item is not org.apache.pdfbox.pdmodel.PDDocument pd) continue; pageText = dataStripper.getText(pd); pd.save(pagePDFFile); sourceFiles.Add(pagePDFFile); pd.close(); File.WriteAllText(pageTextFile, pageText); sourceFiles.Add(pageTextFile); } results.Add(pagePDFFile, pageText); } pdDocument.close(); } catch (MissingMethodException) { if (results.Count == 0) results = PortableDocumentFormatSplit(pdfTextStripperFileName, sourcePath, sourceFileNamePdf); } } foreach (string sourceFile in sourceFiles) fileInfoCollection.Add(new FileInfo(sourceFile)); return new(results); } }