79 lines
2.6 KiB
C#
79 lines
2.6 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
|
|
namespace PDF_Text_Stripper;
|
|
|
|
public class Program
|
|
{
|
|
|
|
private static void PortableDocumentFormatWriteText(string sourceFileName)
|
|
{
|
|
object item;
|
|
string pageText;
|
|
string pagePDFFile;
|
|
string pageTextFile;
|
|
java.io.File file = new(sourceFileName);
|
|
org.apache.pdfbox.util.Splitter splitter = new();
|
|
string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception();
|
|
string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName);
|
|
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
|
java.util.List list = splitter.split(pdDocument);
|
|
java.util.ListIterator iterator = list.listIterator();
|
|
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
|
|
for (short i = 1; i < short.MaxValue; i++)
|
|
{
|
|
if (!iterator.hasNext())
|
|
break;
|
|
item = iterator.next();
|
|
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
|
|
if (File.Exists(pagePDFFile))
|
|
File.Delete(pagePDFFile);
|
|
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
|
|
if (File.Exists(pageTextFile))
|
|
File.Delete(pageTextFile);
|
|
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
|
continue;
|
|
pageText = dataStripper.getText(pd);
|
|
pd.save(pagePDFFile);
|
|
pd.close();
|
|
File.WriteAllText(pageTextFile, pageText);
|
|
}
|
|
pdDocument.close();
|
|
}
|
|
|
|
public static void Secondary(List<string> args)
|
|
{
|
|
int silentIndex = args.IndexOf("s");
|
|
if (silentIndex > -1)
|
|
args.RemoveAt(silentIndex);
|
|
try
|
|
{
|
|
if (args.Any() && File.Exists(args[0]))
|
|
PortableDocumentFormatWriteText(args[0]);
|
|
else
|
|
throw new Exception(args[0]);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine(string.Concat(ex.Message, Environment.NewLine, ex.StackTrace));
|
|
}
|
|
if (silentIndex > -1)
|
|
Console.WriteLine("Done. Bye");
|
|
else
|
|
{
|
|
Console.WriteLine("Done. Press 'Enter' to end");
|
|
_ = Console.ReadLine();
|
|
}
|
|
}
|
|
|
|
public static void Main(string[] args)
|
|
{
|
|
if (args is not null)
|
|
Secondary(args.ToList());
|
|
else
|
|
Secondary(new List<string>());
|
|
}
|
|
}
|