Use of Splitter
dotnet 8.0.100
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
<PackageId>Infineon.Mesa.PDF.Text.Stripper</PackageId>
|
||||
<RuntimeIdentifier>win-x86</RuntimeIdentifier>
|
||||
<TargetFrameworks>net48</TargetFrameworks>
|
||||
<Version>4.8.0.1</Version>
|
||||
<Version>4.8.0.2</Version>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Condition=" '$(TargetFramework)' == 'net48' ">
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
|
@ -5,19 +5,42 @@ using System.Linq;
|
||||
|
||||
namespace PDF_Text_Stripper;
|
||||
|
||||
internal class Program
|
||||
public class Program
|
||||
{
|
||||
|
||||
private static void PDFTextStripper(string file)
|
||||
private static void PortableDocumentFormatWriteText(string sourceFileName)
|
||||
{
|
||||
string altFileName = Path.ChangeExtension(file, ".txt");
|
||||
if(File.Exists(altFileName))
|
||||
File.Delete(altFileName);
|
||||
org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
||||
org.apache.pdfbox.util.PDFTextStripper stripper = new();
|
||||
string text = stripper.getText(pdfDocument);
|
||||
pdfDocument.close();
|
||||
File.WriteAllText(altFileName, text);
|
||||
object item;
|
||||
string pageText;
|
||||
string pagePDFFile;
|
||||
string pageTextFile;
|
||||
java.io.File file = new(sourceFileName);
|
||||
org.apache.pdfbox.util.Splitter splitter = new();
|
||||
string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception();
|
||||
string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName);
|
||||
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
|
||||
java.util.List list = splitter.split(pdDocument);
|
||||
java.util.ListIterator iterator = list.listIterator();
|
||||
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
|
||||
for (short i = 1; i < short.MaxValue; i++)
|
||||
{
|
||||
if (!iterator.hasNext())
|
||||
break;
|
||||
item = iterator.next();
|
||||
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
|
||||
if (File.Exists(pagePDFFile))
|
||||
File.Delete(pagePDFFile);
|
||||
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
|
||||
if (File.Exists(pageTextFile))
|
||||
File.Delete(pageTextFile);
|
||||
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
|
||||
continue;
|
||||
pageText = dataStripper.getText(pd);
|
||||
pd.save(pagePDFFile);
|
||||
pd.close();
|
||||
File.WriteAllText(pageTextFile, pageText);
|
||||
}
|
||||
pdDocument.close();
|
||||
}
|
||||
|
||||
public static void Secondary(List<string> args)
|
||||
@ -28,7 +51,7 @@ internal class Program
|
||||
try
|
||||
{
|
||||
if (args.Any() && File.Exists(args[0]))
|
||||
PDFTextStripper(args[0]);
|
||||
PortableDocumentFormatWriteText(args[0]);
|
||||
else
|
||||
throw new Exception(args[0]);
|
||||
}
|
||||
|
Reference in New Issue
Block a user