1 Commits

Author SHA1 Message Date
c77d590c31 dotnet 8.0.100 2025-02-26 08:20:12 -07:00
2 changed files with 11 additions and 34 deletions

View File

@ -8,7 +8,7 @@
<PackageId>Infineon.Mesa.PDF.Text.Stripper</PackageId> <PackageId>Infineon.Mesa.PDF.Text.Stripper</PackageId>
<RuntimeIdentifier>win-x86</RuntimeIdentifier> <RuntimeIdentifier>win-x86</RuntimeIdentifier>
<TargetFrameworks>net48</TargetFrameworks> <TargetFrameworks>net48</TargetFrameworks>
<Version>4.8.0.2</Version> <Version>4.8.0.1</Version>
</PropertyGroup> </PropertyGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'net48' "> <ItemGroup Condition=" '$(TargetFramework)' == 'net48' ">
<Reference Include="Microsoft.CSharp" /> <Reference Include="Microsoft.CSharp" />

View File

@ -8,39 +8,16 @@ namespace PDF_Text_Stripper;
public class Program public class Program
{ {
private static void PortableDocumentFormatWriteText(string sourceFileName) private static void PDFTextStripper(string file)
{ {
object item; string altFileName = Path.ChangeExtension(file, ".txt");
string pageText; if(File.Exists(altFileName))
string pagePDFFile; File.Delete(altFileName);
string pageTextFile; org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
java.io.File file = new(sourceFileName); org.apache.pdfbox.util.PDFTextStripper stripper = new();
org.apache.pdfbox.util.Splitter splitter = new(); string text = stripper.getText(pdfDocument);
string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception(); pdfDocument.close();
string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName); File.WriteAllText(altFileName, text);
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
java.util.List list = splitter.split(pdDocument);
java.util.ListIterator iterator = list.listIterator();
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
for (short i = 1; i < short.MaxValue; i++)
{
if (!iterator.hasNext())
break;
item = iterator.next();
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
if (File.Exists(pagePDFFile))
File.Delete(pagePDFFile);
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
if (File.Exists(pageTextFile))
File.Delete(pageTextFile);
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
continue;
pageText = dataStripper.getText(pd);
pd.save(pagePDFFile);
pd.close();
File.WriteAllText(pageTextFile, pageText);
}
pdDocument.close();
} }
public static void Secondary(List<string> args) public static void Secondary(List<string> args)
@ -51,7 +28,7 @@ public class Program
try try
{ {
if (args.Any() && File.Exists(args[0])) if (args.Any() && File.Exists(args[0]))
PortableDocumentFormatWriteText(args[0]); PDFTextStripper(args[0]);
else else
throw new Exception(args[0]); throw new Exception(args[0]);
} }