using System; using System.Collections.Generic; using System.IO; using System.Linq; namespace PDF_Text_Stripper; public class Program { private static void PortableDocumentFormatWriteText(string sourceFileName) { object item; string pageText; string pagePDFFile; string pageTextFile; java.io.File file = new(sourceFileName); org.apache.pdfbox.util.Splitter splitter = new(); string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception(); string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName); org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); java.util.List list = splitter.split(pdDocument); java.util.ListIterator iterator = list.listIterator(); org.apache.pdfbox.util.PDFTextStripper dataStripper = new(); for (short i = 1; i < short.MaxValue; i++) { if (!iterator.hasNext()) break; item = iterator.next(); pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf"); if (File.Exists(pagePDFFile)) File.Delete(pagePDFFile); pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt"); if (File.Exists(pageTextFile)) File.Delete(pageTextFile); if (item is not org.apache.pdfbox.pdmodel.PDDocument pd) continue; pageText = dataStripper.getText(pd); pd.save(pagePDFFile); pd.close(); File.WriteAllText(pageTextFile, pageText); } pdDocument.close(); } public static void Secondary(List args) { int silentIndex = args.IndexOf("s"); if (silentIndex > -1) args.RemoveAt(silentIndex); try { if (args.Any() && File.Exists(args[0])) PortableDocumentFormatWriteText(args[0]); else throw new Exception(args[0]); } catch (Exception ex) { Console.WriteLine(string.Concat(ex.Message, Environment.NewLine, ex.StackTrace)); } if (silentIndex > -1) Console.WriteLine("Done. Bye"); else { Console.WriteLine("Done. Press 'Enter' to end"); _ = Console.ReadLine(); } } public static void Main(string[] args) { if (args is not null) Secondary(args.ToList()); else Secondary(new List()); } }