diff --git a/.vscode/format-report.json b/.vscode/format-report.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/.vscode/format-report.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..3599b51 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "[markdown]": { + "editor.wordWrap": "off" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..53d749b --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,28 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Build", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "/property:GenerateFullPaths=true", + "/consoleloggerparameters:NoSummary" + ], + "problemMatcher": "$msCompile" + }, + { + "label": "Build Self", + "command": "dotnet", + "type": "process", + "args": [ + "build", + "--runtime", + "win-x64", + "--self-contained" + ], + "problemMatcher": "$msCompile" + }, + ] +} \ No newline at end of file diff --git a/PDF-Text-Stripper.yml b/PDF-Text-Stripper.yml index 8241a7b..3ef292e 100644 --- a/PDF-Text-Stripper.yml +++ b/PDF-Text-Stripper.yml @@ -23,7 +23,7 @@ steps: displayName: Configuration - script: | - set nugetSource=https://messa08ec.ec.local/v3/index.json + set nugetSource=https://eaf-prod.mes.infineon.com/v3/index.json echo %nugetSource% echo ##vso[task.setvariable variable=NugetSource;]%nugetSource% echo $(NugetSource) diff --git a/PDF-Text-Stripper/PDF-Text-Stripper.csproj b/PDF-Text-Stripper/PDF-Text-Stripper.csproj index 214b960..970aead 100644 --- a/PDF-Text-Stripper/PDF-Text-Stripper.csproj +++ b/PDF-Text-Stripper/PDF-Text-Stripper.csproj @@ -8,7 +8,7 @@ Infineon.Mesa.PDF.Text.Stripper win-x86 net48 - 4.8.0.1 + 4.8.0.2 diff --git a/PDF-Text-Stripper/Program.cs b/PDF-Text-Stripper/Program.cs index 78d06ea..2acf76b 100644 --- a/PDF-Text-Stripper/Program.cs +++ b/PDF-Text-Stripper/Program.cs @@ -5,19 +5,42 @@ using System.Linq; namespace PDF_Text_Stripper; -internal class Program +public class Program { - private static void PDFTextStripper(string file) + private static void PortableDocumentFormatWriteText(string sourceFileName) { - string altFileName = Path.ChangeExtension(file, ".txt"); - if(File.Exists(altFileName)) - File.Delete(altFileName); - org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); - org.apache.pdfbox.util.PDFTextStripper stripper = new(); - string text = stripper.getText(pdfDocument); - pdfDocument.close(); - File.WriteAllText(altFileName, text); + object item; + string pageText; + string pagePDFFile; + string pageTextFile; + java.io.File file = new(sourceFileName); + org.apache.pdfbox.util.Splitter splitter = new(); + string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception(); + string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName); + org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); + java.util.List list = splitter.split(pdDocument); + java.util.ListIterator iterator = list.listIterator(); + org.apache.pdfbox.util.PDFTextStripper dataStripper = new(); + for (short i = 1; i < short.MaxValue; i++) + { + if (!iterator.hasNext()) + break; + item = iterator.next(); + pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf"); + if (File.Exists(pagePDFFile)) + File.Delete(pagePDFFile); + pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt"); + if (File.Exists(pageTextFile)) + File.Delete(pageTextFile); + if (item is not org.apache.pdfbox.pdmodel.PDDocument pd) + continue; + pageText = dataStripper.getText(pd); + pd.save(pagePDFFile); + pd.close(); + File.WriteAllText(pageTextFile, pageText); + } + pdDocument.close(); } public static void Secondary(List args) @@ -28,7 +51,7 @@ internal class Program try { if (args.Any() && File.Exists(args[0])) - PDFTextStripper(args[0]); + PortableDocumentFormatWriteText(args[0]); else throw new Exception(args[0]); } diff --git a/global.json b/global.json index 6dd2ad5..062f5fe 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "6.0.202" + "version": "8.0.100" } } \ No newline at end of file