run.json descriptions.json MissingMethodException Infineon.Mesa.PDF.Text.Stripper 4.8.0.2 MSTEST0037
149 lines
6.5 KiB
C#
149 lines
6.5 KiB
C#
using Adaptation.Shared;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Collections.ObjectModel;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Linq;
|
|
|
|
namespace Adaptation.FileHandlers.pcl;
|
|
|
|
internal class Convert
|
|
{
|
|
|
|
/// <summary>
|
|
/// Convert the raw data file to parsable file format - in this case from PCL to PDF
|
|
/// </summary>
|
|
/// <param name="sourceFile">source file to be converted to PDF</param>
|
|
/// <returns></returns>
|
|
private static string ConvertSourceFileToPdf(string ghostPCLFileName, Logistics logistics)
|
|
{
|
|
string result = Path.ChangeExtension(logistics.ReportFullPath, ".pdf");
|
|
if (!File.Exists(result))
|
|
{
|
|
//string arguments = string.Concat("-i \"", sourceFile, "\" -o \"", result, "\"");
|
|
string arguments = string.Concat("-dSAFER -dBATCH -dNOPAUSE -sOutputFile=\"", result, "\" -sDEVICE=pdfwrite \"", logistics.ReportFullPath, "\"");
|
|
//Process process = Process.Start(configData.LincPDFCFileName, arguments);
|
|
Process process = Process.Start(ghostPCLFileName, arguments);
|
|
_ = process.WaitForExit(30000);
|
|
if (!File.Exists(result))
|
|
throw new Exception("PDF file wasn't created");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private static Dictionary<string, string> PortableDocumentFormatSplit(string pdfTextStripperFileName, string sourcePath, string sourceFileNamePdf)
|
|
{
|
|
Dictionary<string, string> results = new();
|
|
ProcessStartInfo processStartInfo = new(pdfTextStripperFileName, $"s \"{sourceFileNamePdf}\"")
|
|
{
|
|
UseShellExecute = false,
|
|
RedirectStandardError = true,
|
|
RedirectStandardOutput = true,
|
|
};
|
|
Process process = Process.Start(processStartInfo);
|
|
_ = process.WaitForExit(30000);
|
|
string text;
|
|
string checkFile;
|
|
string[] pdfFiles = Directory.GetFiles(sourcePath, "*.pdf", SearchOption.TopDirectoryOnly);
|
|
string[] textFiles = Directory.GetFiles(sourcePath, "*.txt", SearchOption.TopDirectoryOnly);
|
|
foreach (string pdfFile in pdfFiles)
|
|
{
|
|
if (pdfFile == sourceFileNamePdf)
|
|
continue;
|
|
checkFile = Path.ChangeExtension(pdfFile, ".txt");
|
|
if (!textFiles.Contains(checkFile))
|
|
continue;
|
|
text = File.ReadAllText(checkFile);
|
|
results.Add(pdfFile, text);
|
|
}
|
|
return results;
|
|
}
|
|
|
|
internal static ReadOnlyDictionary<string, string> PDF(Logistics logistics, string ghostPCLFileName, string pdfTextStripperFileName, List<FileInfo> fileInfoCollection)
|
|
{
|
|
Dictionary<string, string> results = new();
|
|
object item;
|
|
string pageText;
|
|
string pagePDFFile;
|
|
string pageTextFile;
|
|
List<string> sourceFiles = new();
|
|
string sourceFileNamePdf = ConvertSourceFileToPdf(ghostPCLFileName, logistics);
|
|
sourceFiles.Add(sourceFileNamePdf);
|
|
string sourcePath = Path.GetDirectoryName(logistics.ReportFullPath) ?? throw new Exception();
|
|
string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(logistics.ReportFullPath);
|
|
string[] txtFiles = Directory.GetFiles(sourcePath, $"{sourceFileNameWithoutExtension}_*.txt", SearchOption.TopDirectoryOnly);
|
|
if (txtFiles.Length != 0)
|
|
{
|
|
foreach (string txtFile in txtFiles)
|
|
{
|
|
sourceFiles.Add(txtFile);
|
|
pageText = File.ReadAllText(txtFile);
|
|
pagePDFFile = Path.ChangeExtension(txtFile, ".pdf");
|
|
if (!File.Exists(pagePDFFile))
|
|
continue;
|
|
results.Add(pagePDFFile, pageText);
|
|
}
|
|
}
|
|
if (results.Count == 0)
|
|
{
|
|
try
|
|
{
|
|
java.io.File file = new(sourceFileNamePdf);
|
|
org.apache.pdfbox.util.Splitter splitter = new();
|
|
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
|
java.util.List list = splitter.split(pdDocument);
|
|
java.util.ListIterator iterator = list.listIterator();
|
|
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
|
|
for (short i = 1; i < short.MaxValue; i++)
|
|
{
|
|
if (!iterator.hasNext())
|
|
break;
|
|
item = iterator.next();
|
|
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
|
|
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
|
|
if (File.Exists(pageTextFile))
|
|
{
|
|
pageText = File.ReadAllText(pageTextFile);
|
|
sourceFiles.Add(pageTextFile);
|
|
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
|
continue;
|
|
pd.close();
|
|
}
|
|
else if (File.Exists(pagePDFFile))
|
|
{
|
|
org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile);
|
|
pageText = dataStripper.getText(document);
|
|
document.close();
|
|
sourceFiles.Add(pagePDFFile);
|
|
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
|
continue;
|
|
pd.close();
|
|
}
|
|
else
|
|
{
|
|
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
|
continue;
|
|
pageText = dataStripper.getText(pd);
|
|
pd.save(pagePDFFile);
|
|
sourceFiles.Add(pagePDFFile);
|
|
pd.close();
|
|
File.WriteAllText(pageTextFile, pageText);
|
|
sourceFiles.Add(pageTextFile);
|
|
}
|
|
results.Add(pagePDFFile, pageText);
|
|
}
|
|
pdDocument.close();
|
|
}
|
|
catch (MissingMethodException)
|
|
{
|
|
if (results.Count == 0)
|
|
results = PortableDocumentFormatSplit(pdfTextStripperFileName, sourcePath, sourceFileNamePdf);
|
|
}
|
|
}
|
|
foreach (string sourceFile in sourceFiles)
|
|
fileInfoCollection.Add(new FileInfo(sourceFile));
|
|
return new(results);
|
|
}
|
|
|
|
} |