2024-11-20 08:16:28 -07:00

112 lines
4.8 KiB
C#

using Adaptation.Shared;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.IO;
namespace Adaptation.FileHandlers.pcl;
internal class Convert
{
/// <summary>
/// Convert the raw data file to parsable file format - in this case from PCL to PDF
/// </summary>
/// <param name="sourceFile">source file to be converted to PDF</param>
/// <returns></returns>
private static string ConvertSourceFileToPdf(string ghostPCLFileName, Logistics logistics)
{
string result = Path.ChangeExtension(logistics.ReportFullPath, ".pdf");
if (!File.Exists(result))
{
//string arguments = string.Concat("-i \"", sourceFile, "\" -o \"", result, "\"");
string arguments = string.Concat("-dSAFER -dBATCH -dNOPAUSE -sOutputFile=\"", result, "\" -sDEVICE=pdfwrite \"", logistics.ReportFullPath, "\"");
//Process process = Process.Start(configData.LincPDFCFileName, arguments);
Process process = Process.Start(ghostPCLFileName, arguments);
_ = process.WaitForExit(30000);
if (!File.Exists(result))
throw new Exception("PDF file wasn't created");
}
return result;
}
internal static ReadOnlyDictionary<string, string> PDF(Logistics logistics, string ghostPCLFileName, List<FileInfo> fileInfoCollection)
{
Dictionary<string, string> results = new();
object item;
string pageText;
string pagePDFFile;
string pageTextFile;
List<string> sourceFiles = new();
string sourceFileNamePdf = ConvertSourceFileToPdf(ghostPCLFileName, logistics);
sourceFiles.Add(sourceFileNamePdf);
string sourcePath = Path.GetDirectoryName(logistics.ReportFullPath) ?? throw new Exception();
string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(logistics.ReportFullPath);
string[] txtFiles = Directory.GetFiles(sourcePath, $"{sourceFileNameWithoutExtension}_*.txt", SearchOption.TopDirectoryOnly);
if (txtFiles.Length != 0)
{
foreach (string txtFile in txtFiles)
{
sourceFiles.Add(txtFile);
pageText = File.ReadAllText(txtFile);
pagePDFFile = Path.ChangeExtension(txtFile, ".pdf");
if (!File.Exists(pagePDFFile))
continue;
results.Add(pagePDFFile, pageText);
}
}
if (results.Count == 0)
{
java.io.File file = new(sourceFileNamePdf);
org.apache.pdfbox.util.Splitter splitter = new();
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
java.util.List list = splitter.split(pdDocument);
java.util.ListIterator iterator = list.listIterator();
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
for (short i = 1; i < short.MaxValue; i++)
{
if (!iterator.hasNext())
break;
item = iterator.next();
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
if (File.Exists(pageTextFile))
{
pageText = File.ReadAllText(pageTextFile);
sourceFiles.Add(pageTextFile);
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
continue;
pd.close();
}
else if (File.Exists(pagePDFFile))
{
org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile);
pageText = dataStripper.getText(document);
document.close();
sourceFiles.Add(pagePDFFile);
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
continue;
pd.close();
}
else
{
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
continue;
pageText = dataStripper.getText(pd);
pd.save(pagePDFFile);
sourceFiles.Add(pagePDFFile);
pd.close();
File.WriteAllText(pageTextFile, pageText);
sourceFiles.Add(pageTextFile);
}
results.Add(pagePDFFile, pageText);
}
pdDocument.close();
}
foreach (string sourceFile in sourceFiles)
fileInfoCollection.Add(new FileInfo(sourceFile));
return new(results);
}
}