Compare commits

2 Commits

Author SHA1 Message Date
ae70dda4c9 hours-since-april-18-2019 2025-08-13 18:59:23 -07:00
cb333ec871 Use of Splitter
dotnet 8.0.100
2025-02-26 12:36:41 -07:00
7 changed files with 104 additions and 18 deletions

1
.vscode/format-report.json vendored Normal file
View File

@ -0,0 +1 @@
[]

5
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"[markdown]": {
"editor.wordWrap": "off"
}
}

43
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,43 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "Build",
"command": "dotnet",
"type": "process",
"args": [
"build",
"--configuration",
"Release"
],
"problemMatcher": "$msCompile"
},
{
"label": "Push Package",
"command": "dotnet",
"type": "process",
"args": [
"nuget",
"push",
"PDF-Text-Stripper/bin/Release/Infineon.Mesa.PDF.Text.Stripper.4.8.0.asdf.nupkg",
"--api-key",
"asdf",
"--source",
"https://api.nuget.org/v3/index.json"
],
"problemMatcher": "$msCompile"
},
{
"label": "Build Self",
"command": "dotnet",
"type": "process",
"args": [
"build",
"--runtime",
"win-x64",
"--self-contained"
],
"problemMatcher": "$msCompile"
},
]
}

View File

@ -23,7 +23,7 @@ steps:
displayName: Configuration displayName: Configuration
- script: | - script: |
set nugetSource=https://messa08ec.ec.local/v3/index.json set nugetSource=https://eaf-prod.mes.infineon.com/v3/index.json
echo %nugetSource% echo %nugetSource%
echo ##vso[task.setvariable variable=NugetSource;]%nugetSource% echo ##vso[task.setvariable variable=NugetSource;]%nugetSource%
echo $(NugetSource) echo $(NugetSource)

View File

@ -1,15 +1,29 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<Authors>Mike Phares</Authors>
<Company>Infineon Technologies Americas Corp.</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<LangVersion>10.0</LangVersion> <LangVersion>10.0</LangVersion>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<PackageId>Infineon.Mesa.PDF.Text.Stripper</PackageId>
<RuntimeIdentifier>win-x86</RuntimeIdentifier> <RuntimeIdentifier>win-x86</RuntimeIdentifier>
<TargetFrameworks>net48</TargetFrameworks> <TargetFrameworks>net48</TargetFrameworks>
<Version>4.8.0.1</Version> <Version>4.8.0.2</Version>
</PropertyGroup> </PropertyGroup>
<PropertyGroup>
<HoursSinceApril182019>$([System.Math]::Floor($([MSBuild]::Divide($([MSBuild]::Subtract($([System.DateTimeOffset]::UtcNow.ToUnixTimeSeconds()), 1555545600)), 3600))))</HoursSinceApril182019>
</PropertyGroup>
<PropertyGroup>
<Authors>Mike Phares</Authors>
<LangVersion>10.0</LangVersion>
<IncludeSymbols>true</IncludeSymbols>
<PackageReadmeFile>README.md</PackageReadmeFile>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageId>Infineon.Mesa.PDF.Text.Stripper</PackageId>
<Company>Infineon Technologies Americas Corp.</Company>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Version>4.8.0.$([System.Math]::Floor($([MSBuild]::Divide($([MSBuild]::Subtract($([System.DateTimeOffset]::UtcNow.ToUnixTimeSeconds()), 1555545600)), 3600))))</Version>
</PropertyGroup>
<ItemGroup>
<None Include="..\README.md" Pack="true" PackagePath="\" />
</ItemGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'net48' "> <ItemGroup Condition=" '$(TargetFramework)' == 'net48' ">
<Reference Include="Microsoft.CSharp" /> <Reference Include="Microsoft.CSharp" />
<Reference Include="System.Core" /> <Reference Include="System.Core" />

View File

@ -5,19 +5,42 @@ using System.Linq;
namespace PDF_Text_Stripper; namespace PDF_Text_Stripper;
internal class Program public class Program
{ {
private static void PDFTextStripper(string file) private static void PortableDocumentFormatWriteText(string sourceFileName)
{ {
string altFileName = Path.ChangeExtension(file, ".txt"); object item;
if(File.Exists(altFileName)) string pageText;
File.Delete(altFileName); string pagePDFFile;
org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); string pageTextFile;
org.apache.pdfbox.util.PDFTextStripper stripper = new(); java.io.File file = new(sourceFileName);
string text = stripper.getText(pdfDocument); org.apache.pdfbox.util.Splitter splitter = new();
pdfDocument.close(); string sourcePath = Path.GetDirectoryName(sourceFileName) ?? throw new Exception();
File.WriteAllText(altFileName, text); string sourceFileNameWithoutExtension = Path.GetFileNameWithoutExtension(sourceFileName);
org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file);
java.util.List list = splitter.split(pdDocument);
java.util.ListIterator iterator = list.listIterator();
org.apache.pdfbox.util.PDFTextStripper dataStripper = new();
for (short i = 1; i < short.MaxValue; i++)
{
if (!iterator.hasNext())
break;
item = iterator.next();
pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameWithoutExtension, "_", i, ".pdf");
if (File.Exists(pagePDFFile))
File.Delete(pagePDFFile);
pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt");
if (File.Exists(pageTextFile))
File.Delete(pageTextFile);
if (item is not org.apache.pdfbox.pdmodel.PDDocument pd)
continue;
pageText = dataStripper.getText(pd);
pd.save(pagePDFFile);
pd.close();
File.WriteAllText(pageTextFile, pageText);
}
pdDocument.close();
} }
public static void Secondary(List<string> args) public static void Secondary(List<string> args)
@ -28,7 +51,7 @@ internal class Program
try try
{ {
if (args.Any() && File.Exists(args[0])) if (args.Any() && File.Exists(args[0]))
PDFTextStripper(args[0]); PortableDocumentFormatWriteText(args[0]);
else else
throw new Exception(args[0]); throw new Exception(args[0]);
} }

View File

@ -1,6 +1,6 @@
{ {
"sdk": { "sdk": {
"version": "6.0.202" "version": "8.0.118"
} }
} }