ProcessDataStandardFormat
run.json descriptions.json MissingMethodException Infineon.Mesa.PDF.Text.Stripper 4.8.0.2 MSTEST0037
This commit is contained in:
@ -4,6 +4,7 @@ using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace Adaptation.FileHandlers.pcl;
|
||||
|
||||
@ -31,7 +32,35 @@ internal class Convert
|
||||
return result;
|
||||
}
|
||||
|
||||
internal static ReadOnlyDictionary<string, string> PDF(Logistics logistics, string ghostPCLFileName, List<FileInfo> fileInfoCollection)
|
||||
private static Dictionary<string, string> PortableDocumentFormatSplit(string pdfTextStripperFileName, string sourcePath, string sourceFileNamePdf)
|
||||
{
|
||||
Dictionary<string, string> results = new();
|
||||
ProcessStartInfo processStartInfo = new(pdfTextStripperFileName, $"s \"{sourceFileNamePdf}\"")
|
||||
{
|
||||
UseShellExecute = false,
|
||||
RedirectStandardError = true,
|
||||
RedirectStandardOutput = true,
|
||||
};
|
||||
Process process = Process.Start(processStartInfo);
|
||||
_ = process.WaitForExit(30000);
|
||||
string text;
|
||||
string checkFile;
|
||||
string[] pdfFiles = Directory.GetFiles(sourcePath, "*.pdf", SearchOption.TopDirectoryOnly);
|
||||
string[] textFiles = Directory.GetFiles(sourcePath, "*.txt", SearchOption.TopDirectoryOnly);
|
||||
foreach (string pdfFile in pdfFiles)
|
||||
{
|
||||
if (pdfFile == sourceFileNamePdf)
|
||||
continue;
|
||||
checkFile = Path.ChangeExtension(pdfFile, ".txt");
|
||||
if (!textFiles.Contains(checkFile))
|
||||
continue;
|
||||
text = File.ReadAllText(checkFile);
|
||||
results.Add(pdfFile, text);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
internal static ReadOnlyDictionary<string, string> PDF(Logistics logistics, string ghostPCLFileName, string pdfTextStripperFileName, List<FileInfo> fileInfoCollection)
|
||||
{
|
||||
Dictionary<string, string> results = new();
|
||||
object item;
|
||||
@ -58,51 +87,59 @@ internal class Convert
|
||||
}
|
||||
if (results.Count == 0)
|
||||
{
|
||||
java.io.File file = new(sourceFileNamePdf);
|
||||
org.apache.pdfbox.util.Splitter splitter = new();
|
||||
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
||||
java.util.List list = splitter.split(pdDocument);
|
||||
java.util.ListIterator iterator = list.listIterator();
|
||||
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
|
||||
for (short i = 1; i < short.MaxValue; i++)
|
||||
try
|
||||
{
|
||||
if (!iterator.hasNext())
|
||||
break;
|
||||
item = iterator.next();
|
||||
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
|
||||
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
|
||||
if (File.Exists(pageTextFile))
|
||||
java.io.File file = new(sourceFileNamePdf);
|
||||
org.apache.pdfbox.util.Splitter splitter = new();
|
||||
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
||||
java.util.List list = splitter.split(pdDocument);
|
||||
java.util.ListIterator iterator = list.listIterator();
|
||||
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
|
||||
for (short i = 1; i < short.MaxValue; i++)
|
||||
{
|
||||
pageText = File.ReadAllText(pageTextFile);
|
||||
sourceFiles.Add(pageTextFile);
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pd.close();
|
||||
if (!iterator.hasNext())
|
||||
break;
|
||||
item = iterator.next();
|
||||
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
|
||||
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
|
||||
if (File.Exists(pageTextFile))
|
||||
{
|
||||
pageText = File.ReadAllText(pageTextFile);
|
||||
sourceFiles.Add(pageTextFile);
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pd.close();
|
||||
}
|
||||
else if (File.Exists(pagePDFFile))
|
||||
{
|
||||
org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile);
|
||||
pageText = dataStripper.getText(document);
|
||||
document.close();
|
||||
sourceFiles.Add(pagePDFFile);
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pd.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pageText = dataStripper.getText(pd);
|
||||
pd.save(pagePDFFile);
|
||||
sourceFiles.Add(pagePDFFile);
|
||||
pd.close();
|
||||
File.WriteAllText(pageTextFile, pageText);
|
||||
sourceFiles.Add(pageTextFile);
|
||||
}
|
||||
results.Add(pagePDFFile, pageText);
|
||||
}
|
||||
else if (File.Exists(pagePDFFile))
|
||||
{
|
||||
org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile);
|
||||
pageText = dataStripper.getText(document);
|
||||
document.close();
|
||||
sourceFiles.Add(pagePDFFile);
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pd.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pageText = dataStripper.getText(pd);
|
||||
pd.save(pagePDFFile);
|
||||
sourceFiles.Add(pagePDFFile);
|
||||
pd.close();
|
||||
File.WriteAllText(pageTextFile, pageText);
|
||||
sourceFiles.Add(pageTextFile);
|
||||
}
|
||||
results.Add(pagePDFFile, pageText);
|
||||
pdDocument.close();
|
||||
}
|
||||
catch (MissingMethodException)
|
||||
{
|
||||
if (results.Count == 0)
|
||||
results = PortableDocumentFormatSplit(pdfTextStripperFileName, sourcePath, sourceFileNamePdf);
|
||||
}
|
||||
pdDocument.close();
|
||||
}
|
||||
foreach (string sourceFile in sourceFiles)
|
||||
fileInfoCollection.Add(new FileInfo(sourceFile));
|
||||
|
@ -16,6 +16,7 @@ public class FileRead : Shared.FileRead, IFileRead
|
||||
|
||||
private long? _TickOffset;
|
||||
private readonly string _GhostPCLFileName;
|
||||
private readonly string _PDFTextStripperFileName;
|
||||
|
||||
public FileRead(ISMTP smtp, Dictionary<string, string> fileParameter, string cellInstanceName, int? connectionCount, string cellInstanceConnectionName, FileConnectorConfiguration fileConnectorConfiguration, string equipmentTypeName, string parameterizedModelObjectDefinitionType, IList<ModelObjectParameterDefinition> modelObjectParameters, string equipmentDictionaryName, Dictionary<string, List<long>> dummyRuns, Dictionary<long, List<string>> staticRuns, bool useCyclicalForDescription, bool isEAFHosted) :
|
||||
base(new Description(), true, smtp, fileParameter, cellInstanceName, connectionCount, cellInstanceConnectionName, fileConnectorConfiguration, equipmentTypeName, parameterizedModelObjectDefinitionType, modelObjectParameters, equipmentDictionaryName, dummyRuns, staticRuns, useCyclicalForDescription, isEAFHosted: connectionCount is null)
|
||||
@ -32,6 +33,9 @@ public class FileRead : Shared.FileRead, IFileRead
|
||||
_GhostPCLFileName = Path.Combine(AppContext.BaseDirectory, "gpcl6win64.exe");
|
||||
if (!File.Exists(_GhostPCLFileName))
|
||||
throw new Exception("Ghost PCL FileName doesn't Exist!");
|
||||
_PDFTextStripperFileName = Path.Combine(AppContext.BaseDirectory, "PDF-Text-Stripper.exe");
|
||||
if (!File.Exists(_PDFTextStripperFileName))
|
||||
throw new Exception("PDF-Text-Stripper FileName doesn't Exist!");
|
||||
if (_IsEAFHosted)
|
||||
NestExistingFiles(_FileConnectorConfiguration);
|
||||
}
|
||||
@ -113,7 +117,7 @@ public class FileRead : Shared.FileRead, IFileRead
|
||||
results.Item4.Add(_Logistics.FileInfo);
|
||||
else
|
||||
{
|
||||
ReadOnlyDictionary<string, string> pages = Convert.PDF(_Logistics, _GhostPCLFileName, results.Item4);
|
||||
ReadOnlyDictionary<string, string> pages = Convert.PDF(_Logistics, _GhostPCLFileName, _PDFTextStripperFileName, results.Item4);
|
||||
Run? run = Run.Get(_Logistics, results.Item4, pages);
|
||||
if (run is null)
|
||||
throw new Exception(string.Concat("A) No Data - ", dateTime.Ticks));
|
||||
|
@ -106,9 +106,13 @@ public class ProcessData : IProcessData
|
||||
if (description.Test != (int)tests[i])
|
||||
throw new Exception();
|
||||
}
|
||||
FileInfo fileInfo = new($"{logistics.ReportFullPath}.descriptions.json");
|
||||
List<Description> fileReadDescriptions = (from l in descriptions select (Description)l).ToList();
|
||||
string json = JsonSerializer.Serialize(fileReadDescriptions, fileReadDescriptions.GetType());
|
||||
JsonElement[] jsonElements = JsonSerializer.Deserialize<JsonElement[]>(json);
|
||||
File.WriteAllText(fileInfo.FullName, json);
|
||||
File.SetLastWriteTime(fileInfo.FullName, logistics.DateTimeFromSequence);
|
||||
fileInfoCollection.Add(fileInfo);
|
||||
JsonElement[] jsonElements = JsonSerializer.Deserialize<JsonElement[]>(json) ?? throw new Exception();
|
||||
results = new Tuple<string, Test[], JsonElement[], List<FileInfo>>(logistics.Logistics1[0], tests.ToArray(), jsonElements, fileInfoCollection);
|
||||
return results;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ internal class Run
|
||||
|
||||
private static void WriteJson(Logistics logistics, List<FileInfo> fileInfoCollection, Run result)
|
||||
{
|
||||
FileInfo fileInfo = new($"{logistics.ReportFullPath}.json");
|
||||
FileInfo fileInfo = new($"{logistics.ReportFullPath}.run.json");
|
||||
string json = JsonSerializer.Serialize(result, RunSourceGenerationContext.Default.Run);
|
||||
File.WriteAllText(fileInfo.FullName, json);
|
||||
File.SetLastWriteTime(fileInfo.FullName, logistics.DateTimeFromSequence);
|
||||
|
Reference in New Issue
Block a user