using log4net; using Shared; using Shared.Metrology; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; namespace MET08DDUPSFS6420.Helpers { public partial class ProcessData { public HeaderFile Header { get; private set; } public List Details { get; private set; } private int _I; private ILog _Log; private string _Data; public ProcessData(ILogic logic, ConfigData configData, List fileInfoCollection) { Header = null; fileInfoCollection.Clear(); _I = 0; _Data = string.Empty; Details = new List(); _Log = LogManager.GetLogger(typeof(ProcessData)); Tuple> tuple = Parse(logic, configData, fileInfoCollection); Details.AddRange(tuple.Item2); Header = tuple.Item1; } public static string GetLines(ILogic logic, List descriptions, bool ganPPTST) { StringBuilder result = new StringBuilder(); FileRead.Description x = descriptions[0]; if (ganPPTST) { string slot; string reactor; const int eight = 8; DateTime dateTime = DateTime.Parse(x.Date); string lot = x.Lot.ToLower().Replace("69-", string.Empty).Replace("71-", string.Empty).Replace("-", string.Empty); if (string.IsNullOrEmpty(x.Lot) || x.Lot.Length < 2) reactor = "R"; else reactor = string.Concat("R", x.Lot.Substring(0, 2)); result.Append(nameof(x.Date)).Append(";"). Append("Part").Append(";"). Append(nameof(x.Reactor)).Append(";"). Append("Lot").Append(";"). Append(nameof(DataFile.Slot)).Append(";"). Append(nameof(DataFile.Bin1)).Append(";"). Append(nameof(DataFile.Bin2)).Append(";"). Append(nameof(DataFile.Bin3)).Append(";"). Append(nameof(DataFile.Bin4)).Append(";"). Append(nameof(DataFile.Bin5)).Append(";"). Append(nameof(DataFile.Bin6)).Append(";"). Append("Bin9"). AppendLine(); foreach (FileRead.Description description in descriptions) { slot = description.Slot.Replace("*", string.Empty); result.Append("!").Append(dateTime.ToString("MM/dd/yyyy HH:mm:ss")).Append(";"). Append("Particle Adder;"). Append(reactor).Append(";"). Append(lot).Append(";"). Append(slot).Append(";"). Append(description.Bin1).Append(";"). Append(description.Bin2).Append(";"). Append(description.Bin3).Append(";"). Append(description.Bin4).Append(";"). Append(description.Bin5).Append(";"). Append(description.Bin6).Append(";"). Append(description.AreaCount). AppendLine(); } if (descriptions.Count != eight) { string negitiveTenThousand = "-10000"; for (int i = descriptions.Count; i < eight; i++) { result.Append("!").Append(dateTime.ToString("MM/dd/yyyy HH:mm:ss")).Append(";"). Append("Particle Adder;"). Append(reactor).Append(";"). Append(lot).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand). AppendLine(); } } if (result.ToString().Split('\n').Length != (eight + 2)) throw new Exception(string.Concat("Must have ", eight, " samples")); } else { char del = '\t'; result.Append(x.AreaCountAvg).Append(del). // 001 - AreaCountAvg Append(x.AreaCountMax).Append(del). // 002 - AreaCountMax Append(x.AreaCountMin).Append(del). // 003 - AreaCountMin Append(x.AreaCountStdDev).Append(del). // 004 - AreaCountStdDev Append(x.AreaTotalAvg).Append(del). // 005 - AreaTotalAvg Append(x.AreaTotalMax).Append(del). // 006 - AreaTotalMax Append(x.AreaTotalMin).Append(del). // 007 - AreaTotalMin Append(x.AreaTotalStdDev).Append(del). // 008 - AreaTotalStdDev Append(x.Date).Append(del). // 009 - Append(x.HazeAverageAvg).Append(del). // 010 - Haze Average Append(x.HazeAverageMax).Append(del). // 011 - Append(x.HazeAverageMin).Append(del). // 012 - Append(x.HazeAverageStdDev).Append(del). // 013 - Append(x.HazeRegionAvg).Append(del). // 014 - Append(x.HazeRegionMax).Append(del). // 015 - Append(x.HazeRegionMin).Append(del). // 016 - Append(x.HazeRegionStdDev).Append(del). // 017 - Append(x.Lot).Append(del). // 018 - Append(x.LPDCM2Avg).Append(del). // 019 - Append(x.LPDCM2Max).Append(del). // 020 - Append(x.LPDCM2Min).Append(del). // 021 - Append(x.LPDCM2StdDev).Append(del). // 022 - Append(x.LPDCountAvg).Append(del). // 023 - Append(x.LPDCountMax).Append(del). // 024 - Append(x.LPDCM2Min).Append(del). // 025 - Append(x.LPDCountStdDev).Append(del). // 026 - Append(x.Employee).Append(del). // 027 - Append(x.RDS).Append(del). // 028 - Lot Append(x.Reactor).Append(del). // 029 - Process Append(x.Recipe.Replace(";", string.Empty)).Append(del). // 030 - Part Append(x.ScratchCountAvg).Append(del). // 031 - Scratch Count Append(x.ScratchCountMax).Append(del). // 032 - Append(x.ScratchCountMin).Append(del). // 033 - Append(x.ScratchTotalStdDev).Append(del). // 034 - Append(x.ScratchTotalAvg).Append(del). // 035 - Scratch Length Append(x.ScratchTotalMax).Append(del). // 036 - Append(x.ScratchTotalMin).Append(del). // 037 - Append(x.ScratchTotalStdDev).Append(del). // 038 - Append(x.SumOfDefectsAvg).Append(del). // 039 - Average Sum of Defects Append(x.SumOfDefectsMax).Append(del). // 040 - Max Sum of Defects Append(x.SumOfDefectsMin).Append(del). // 041 - Min Sum of Defects Append(x.SumOfDefectsStdDev).Append(del). // 042 - SumOfDefectsStdDev Append(logic.Logistics.MesEntity).Append(del). // 043 - AppendLine(); } return result.ToString(); } internal static void PostOpenInsightMetrologyViewerAttachments(ILog log, ConfigData configData, Logistics logistics, DateTime dateTime, string logisticsSequenceMemoryDirectory, List descriptions, string matchDirectory) { string checkFileName; string[] pclFiles = Directory.GetFiles(matchDirectory, "*.pcl", SearchOption.TopDirectoryOnly); if (pclFiles.Length != 1) throw new Exception("Invalid source file count!"); string sourceFileNameNoExt = Path.GetFileNameWithoutExtension(pclFiles[0]); string wsResultsMemoryFile = string.Concat(logisticsSequenceMemoryDirectory, @"\", nameof(WS.Results), ".json"); if (!File.Exists(wsResultsMemoryFile)) throw new Exception(string.Concat("Memory file <", wsResultsMemoryFile, "> doesn't exist!")); string json = File.ReadAllText(wsResultsMemoryFile); WS.Results metrologyWSRequest = JsonSerializer.Deserialize(json); long wsResultsHeaderID = metrologyWSRequest.HeaderID; List dataAttachments = new List(); List headerAttachments = new List(); checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_data.pdf"); if (!File.Exists(checkFileName)) log.Debug("Header file doesn't exist!"); else headerAttachments.Add(new WS.Attachment(descriptions[0].HeaderUniqueId, "Data.pdf", checkFileName)); foreach (FileRead.Description description in descriptions) { checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_", description.Slot.Replace('*', 's'), "_image.pdf"); if (File.Exists(checkFileName)) dataAttachments.Add(new WS.Attachment(description.UniqueId, "Image.pdf", checkFileName)); checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_", description.Slot.Replace('*', 's'), "_data.pdf"); if (File.Exists(checkFileName)) dataAttachments.Add(new WS.Attachment(description.UniqueId, "Data.pdf", checkFileName)); } if (dataAttachments.Count == 0 || dataAttachments.Count != descriptions.Count) log.Debug("Invalid attachment count!"); WS.AttachFiles(configData.OpenInsightMetrogyViewerAPI, wsResultsHeaderID, headerAttachments, dataAttachments); } /// /// Convert the raw data file to parsable file format - in this case from PCL to PDF /// /// source file to be converted to PDF /// private static string ConvertSourceFileToPdf(ConfigData configData, string sourceFile) { string result = Path.ChangeExtension(sourceFile, ".pdf"); if (!File.Exists(result)) { //string arguments = string.Concat("-i \"", sourceFile, "\" -o \"", result, "\""); string arguments = string.Concat("-dSAFER -dBATCH -dNOPAUSE -sOutputFile=\"", result, "\" -sDEVICE=pdfwrite \"", sourceFile, "\""); //Process process = Process.Start(configData.LincPDFCFileName, arguments); Process process = Process.Start(configData.GhostPCLFileName, arguments); process.WaitForExit(30000); if (!File.Exists(result)) throw new Exception("PDF file wasn't created"); } return result; } /// /// Test and fix a data line from the Lot Summary page if there are two values that are merged. /// /// data line from Lot Summary private void FixToEolArray(ref string[] toEol) { const int MAX_COLUMNS = 9; int[] mColumnWidths = new int[MAX_COLUMNS] { 8, 6, 6, 6, 6, 7, 7, 5, 7 }; // is it short at least one data point if (toEol.Length < MAX_COLUMNS) { _Log.Debug($"****FixToEolArray - Starting array:"); _Log.Debug(toEol); _Log.Debug($"****FixToEolArray - Column widths:"); _Log.Debug(mColumnWidths); string leftVal, rightVal; // size up and assign a working list List toEolList = new List(toEol); if (string.IsNullOrEmpty(toEolList[toEolList.Count - 1])) toEolList.RemoveAt(toEolList.Count - 1); // removes a null element at end _Log.Debug($"****FixToEolArray - New toEolList:"); _Log.Debug(toEolList); for (int i = toEolList.Count; i < MAX_COLUMNS; i++) toEolList.Insert(0, ""); // insert to top of list _Log.Debug(toEolList); // start at the end for (int i = MAX_COLUMNS - 1; i >= 0; i--) { // test for a bad value - does it have too many characters _Log.Debug($"****FixToEolArray - toEolList[i].Length: {toEolList[i].Length}, mColumnWidths[i]: {mColumnWidths[i]}"); if (toEolList[i].Length > mColumnWidths[i]) { // split it up into its two parts leftVal = toEolList[i].Substring(0, toEolList[i].Length - mColumnWidths[i]); rightVal = toEolList[i].Substring(leftVal.Length); _Log.Debug($"****FixToEolArray - Split leftVal: {leftVal}"); _Log.Debug($"****FixToEolArray - Split rightVal: {rightVal}"); // insert new value toEolList[i] = rightVal; toEolList.Insert(i, leftVal); if (string.IsNullOrEmpty(toEolList[0])) toEolList.RemoveAt(0); // removes a null element at end _Log.Debug($"****FixToEolArray - Fixed toEolList:"); _Log.Debug(toEolList); } } toEol = toEolList.ToArray(); _Log.Debug($"****FixToEolArray - Ending array:"); _Log.Debug(toEol); } } private void ScanPast(string text) { int num = _Data.IndexOf(text, _I); if (num > -1) _I = num + text.Length; else _I = _Data.Length; } private string GetBefore(string text) { int num = _Data.IndexOf(text, _I); if (num > -1) { string str = _Data.Substring(_I, num - _I); _I = num + text.Length; return str.Trim(); } string str1 = _Data.Substring(_I); _I = _Data.Length; return str1.Trim(); } private string GetBefore(string text, bool trim) { if (trim) return GetBefore(text); int num = _Data.IndexOf(text, _I); if (num > -1) { string str = _Data.Substring(_I, num - _I); _I = num + text.Length; return str; } string str1 = _Data.Substring(_I); _I = _Data.Length; return str1; } private bool IsNullOrWhiteSpace(string text) { for (int index = 0; index < text.Length; ++index) { if (!char.IsWhiteSpace(text[index])) return false; } return true; } private bool IsBlankLine() { int num = _Data.IndexOf("\n", _I); return IsNullOrWhiteSpace(num > -1 ? _Data.Substring(_I, num - _I) : _Data.Substring(_I)); } private string GetToEOL() { return GetBefore("\n"); } private string GetToEOL(bool trim) { if (trim) return GetToEOL(); return GetBefore("\n", false); } private string GetToText(string text) { return _Data.Substring(_I, _Data.IndexOf(text, _I) - _I).Trim(); } private string GetToken() { while (_I < _Data.Length && IsNullOrWhiteSpace(_Data.Substring(_I, 1))) ++_I; int j = _I; while (j < _Data.Length && !IsNullOrWhiteSpace(_Data.Substring(j, 1))) ++j; string str = _Data.Substring(_I, j - _I); _I = j; return str.Trim(); } private string PeekNextLine() { int j = _I; string toEol = GetToEOL(); _I = j; return toEol; } private HeaderFile ParseLotSummary(ILogic logic, string headerFileName, Dictionary pages, Dictionary> slots) { HeaderFile result = new HeaderFile { JobID = logic.Logistics.JobID, MesEntity = logic.Logistics.MesEntity, Date = DateTime.Now.ToString() }; _I = 0; //string headerText; //string altHeaderFileName = Path.ChangeExtension(headerFileName, ".txt"); //if (File.Exists(altHeaderFileName)) // headerText = File.ReadAllText(altHeaderFileName); //else //{ // //Pdfbox, IKVM.AWT.WinForms // org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(headerFileName); // org.apache.pdfbox.util.PDFTextStripper stripper = new org.apache.pdfbox.util.PDFTextStripper(); // headerText = stripper.getText(pdfDocument); // pdfDocument.close(); // File.AppendAllText(altHeaderFileName, headerText); //} string h = string.Empty; //result.Id = h; //result.Title = h; //result.Zone = h; //result.PSN = h; //result.Layer = h; result.ParseErrorText = string.Empty; if (!pages.ContainsKey(headerFileName)) throw new Exception(); _I = 0; _Data = pages[headerFileName]; ScanPast("Date:"); result.Date = GetToEOL(); ScanPast("Recipe ID:"); result.Recipe = GetBefore("LotID:"); result.Recipe = result.Recipe.Replace(";", ""); if (_Data.Contains("[]")) result.Lot = GetBefore("[]"); else if (_Data.Contains("[7]")) result.Lot = GetBefore("[7]"); else result.Lot = GetBefore("["); // Remove illegal characters \/:*?"<>| found in the Lot. result.Lot = Regex.Replace(result.Lot, @"[\\,\/,\:,\*,\?,\"",\<,\>,\|]", "_").Split('\r')[0].Split('\n')[0]; // determine number of wafers and their slot numbers _Log.Debug(_Data.Substring(_I)); string slot; string toEOL; int slotCount = _Data.Substring(_I).Split('*').Length - 1; _Log.Debug($"****HeaderFile - Slot Count: {slotCount}."); for (int i = 0; i < slotCount; i++) { ScanPast("*"); toEOL = GetToEOL(false); slot = string.Concat("*", toEOL.Substring(0, 2)); if (!slots.ContainsKey(slot)) slots.Add(slot, new List()); } _Log.Debug($"****HeaderFile - Slots:"); _Log.Debug(slots); ScanPast("Min:"); string[] toEol1 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol1 Count: {toEol1.Length}."); FixToEolArray(ref toEol1); result.LPDCountMin = toEol1[0].Trim(); result.LPDCM2Min = toEol1[1].Trim(); result.AreaCountMin = toEol1[2].Trim(); result.AreaTotalMin = toEol1[3].Trim(); result.ScratchCountMin = toEol1[4].Trim(); result.ScratchTotalMin = toEol1[5].Trim(); result.SumOfDefectsMin = toEol1[6].Trim(); result.HazeRegionMin = toEol1[7].Trim(); result.HazeAverageMin = toEol1[8].Trim(); ScanPast("Max:"); string[] toEol2 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol2 Count: {toEol2.Length}."); FixToEolArray(ref toEol2); result.LPDCountMax = toEol2[0].Trim(); result.LPDCM2Max = toEol2[1].Trim(); result.AreaCountMax = toEol2[2].Trim(); result.AreaTotalMax = toEol2[3].Trim(); result.ScratchCountMax = toEol2[4].Trim(); result.ScratchTotalMax = toEol2[5].Trim(); result.SumOfDefectsMax = toEol2[6].Trim(); result.HazeRegionMax = toEol2[7].Trim(); result.HazeAverageMax = toEol2[8].Trim(); ScanPast("Average:"); string[] toEol3 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol3 Count: {toEol3.Length}."); FixToEolArray(ref toEol3); result.LPDCountAvg = toEol3[0].Trim(); result.LPDCM2Avg = toEol3[1].Trim(); result.AreaCountAvg = toEol3[2].Trim(); result.AreaTotalAvg = toEol3[3].Trim(); result.ScratchCountAvg = toEol3[4].Trim(); result.ScratchTotalAvg = toEol3[5].Trim(); result.SumOfDefectsAvg = toEol3[6].Trim(); result.HazeRegionAvg = toEol3[7].Trim(); result.HazeAverageAvg = toEol3[8].Trim(); ScanPast("Std Dev:"); string[] toEol4 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol4 Count: {toEol4.Length}."); FixToEolArray(ref toEol4); result.LPDCountStdDev = toEol4[0].Trim(); result.LPDCM2StdDev = toEol4[1].Trim(); result.AreaCountStdDev = toEol4[2].Trim(); result.AreaTotalStdDev = toEol4[3].Trim(); result.ScratchCountStdDev = toEol4[4].Trim(); result.ScratchTotalStdDev = toEol4[5].Trim(); result.SumOfDefectsStdDev = toEol4[6].Trim(); result.HazeRegionStdDev = toEol4[7].Trim(); result.HazeAverageStdDev = toEol4[8].Trim(); string[] segments = result.Lot.Split('-'); if (segments.Length > 0) result.Reactor = segments[0]; if (segments.Length > 1) result.RDS = segments[1]; if (segments.Length > 2) result.PSN = segments[2]; // Example of header.UniqueId is TENCOR1_33-289217-4693_201901300556533336 result.UniqueId = string.Format("{0}_{1}_{2}", logic.Logistics.JobID, result.Lot, Path.GetFileNameWithoutExtension(logic.Logistics.ReportFullPath)); return result; } private DataFile ParseWaferSummary(HeaderFile headerFile, string waferFileName, Dictionary pages) { DataFile result = new DataFile { Data = "*Data*", i = -1, }; _I = 0; //string waferText; //string altWaferFileName = Path.ChangeExtension(waferFileName, ".txt"); //if (File.Exists(altWaferFileName)) // waferText = File.ReadAllText(altWaferFileName); //else //{ // //Pdfbox, IKVM.AWT.WinForms // org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(waferFileName); // org.apache.pdfbox.util.PDFTextStripper dataStripper = new org.apache.pdfbox.util.PDFTextStripper(); // waferText = dataStripper.getText(pdfDocument); // pdfDocument.close(); // File.AppendAllText(altWaferFileName, waferText); //} List stringList = new List(); result.HeaderUniqueId = headerFile.UniqueId; result.Id = 0; result.Title = null; if (!pages.ContainsKey(waferFileName)) throw new Exception(); _I = 0; _Data = pages[waferFileName]; ScanPast("Date:"); result.Date = GetToEOL(); ScanPast("ID#"); result.Slot = GetToEOL(); if (result.Slot.Length > 5) result.Slot = string.Concat(result.Slot.Substring(0, 5), "... - ***"); //result.Slot = result.Slot.Replace("*", ""); ScanPast("Comments:"); result.Comments = GetToEOL(); ScanPast("Sort:"); result.Sort = GetToEOL(); ScanPast("LPD Count:"); result.LPDCount = GetToEOL(); ScanPast("LPD / cm2:"); result.LPDCM2 = GetToEOL(); while (GetBefore(":").Contains("Bin")) stringList.Add(GetToEOL()); if (stringList.Count >= 1) result.Bin1 = stringList[0]; if (stringList.Count >= 2) result.Bin2 = stringList[1]; if (stringList.Count >= 3) result.Bin3 = stringList[2]; if (stringList.Count >= 4) result.Bin4 = stringList[3]; if (stringList.Count >= 5) result.Bin5 = stringList[4]; if (stringList.Count >= 6) result.Bin6 = stringList[5]; if (stringList.Count >= 7) result.Bin7 = stringList[6]; if (stringList.Count >= 8) result.Bin8 = stringList[7]; result.Mean = GetToEOL(); ScanPast("Std Dev:"); result.StdDev = GetToEOL(); ScanPast("Area Count:"); result.AreaCount = GetToEOL(); ScanPast("Area Total:"); result.AreaTotal = GetToEOL(); ScanPast("Scratch Count:"); result.ScratchCount = GetToEOL(); ScanPast("Scratch Total:"); result.ScratchTotal = GetToEOL(); ScanPast("Sum of All Defects:"); result.SumOfDefects = GetToEOL(); ScanPast("Haze Region:"); result.HazeRegion = GetToEOL(); ScanPast("Haze Average:"); result.HazeAverage = GetToEOL(); ScanPast("Haze Peak:"); result.HazePeak = GetToEOL(); ScanPast("Laser:"); result.Laser = GetBefore("Gain:"); result.Gain = GetBefore("Diameter:"); result.Diameter = GetToEOL(); ScanPast("Thresh:"); result.Thresh = GetBefore("Exclusion:"); result.Exclusion = GetToEOL(); ScanPast("Haze Rng:"); result.HazeRng = GetBefore("Thruput:"); result.Thruput = GetToEOL(); ScanPast("Recipe ID:"); result.Recipe = GetToEOL(); result.UniqueId = string.Format("{0}_{1}", headerFile.UniqueId, result.Slot.Replace("*", string.Empty).TrimStart('0')); return result; } private Tuple> Parse(ILogic logic, ConfigData configData, List fileInfoCollection) { Tuple> result; object item; string pageText; string pagePDFFile; string pageTextFile; DataFile dataFile = null; string pageFileName = string.Empty; string missingWaferMessage = string.Empty; List sourceFiles = new List(); List missingSlots = new List(); List dataFiles = new List(); Dictionary pages = new Dictionary(); string sourcePath = Path.GetDirectoryName(logic.Logistics.ReportFullPath); Dictionary> slots = new Dictionary>(); string sourceFileNamePdf = ConvertSourceFileToPdf(configData, logic.Logistics.ReportFullPath); sourceFiles.Add(sourceFileNamePdf); string sourceFileNameNoExt = Path.GetFileNameWithoutExtension(logic.Logistics.ReportFullPath); ////PdfSharp open pdf //using (PdfSharp.Pdf.PdfDocument sourceDocument = PdfSharp.Pdf.IO.PdfReader.Open(sourceFileNamePdf, PdfSharp.Pdf.IO.PdfDocumentOpenMode.Import)) //{ // for (int idxPage = 0; idxPage < sourceDocument.PageCount; idxPage++) // { // // split the pdf into seperate pages. Odd pages are wafer image, even are wafer summary. Last page is Lot Summary. // _Log.Debug($"****ParseData - Splitting page: {idxPage}, sourceDocument: {sourceDocument.FullPath}, sourcePathFileNoExt: {sourcePathFileNoExt}"); // //SplitPage(sourceDocument, sourcePathFileNoExt, idxPage); // pageNum = idxPage + 1; // pageFile = string.Format("{0}_{1}.pdf", sourcePathFileNoExt, pageNum); // _Log.Debug($"****SplitPage - Page {pageNum} Source file: {sourceDocument.FullPath}"); // _Log.Debug($"****SplitPage - Page {pageNum} Output file: {pageFile}"); // //PdfSharp Create new document // PdfSharp.Pdf.PdfDocument outputDocument = new PdfSharp.Pdf.PdfDocument { Version = sourceDocument.Version }; // outputDocument.Info.Title = string.Format("Page {0} of {1}", pageNum, sourceDocument.Info.Title); // outputDocument.Info.Creator = sourceDocument.Info.Creator; // outputDocument.AddPage(sourceDocument.Pages[idxPage]); // outputDocument.Pages[0].CropBox = new PdfSharp.Pdf.PdfRectangle(new PdfSharp.Drawing.XRect(0, 100, 700, 700)); // outputDocument.Save(pageFile); // } // sourceDocumentPageCount = sourceDocument.PageCount; // sourceDocument.Close(); //} java.io.File file = new java.io.File(sourceFileNamePdf); org.apache.pdfbox.util.Splitter splitter = new org.apache.pdfbox.util.Splitter(); org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); java.util.List list = splitter.split(pdDocument); java.util.ListIterator iterator = list.listIterator(); org.apache.pdfbox.util.PDFTextStripper dataStripper = new org.apache.pdfbox.util.PDFTextStripper(); for (short i = 1; i < short.MaxValue; i++) { if (!iterator.hasNext()) break; item = iterator.next(); pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", i, ".pdf"); pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt"); if (File.Exists(pageTextFile)) { pageText = File.ReadAllText(pageTextFile); sourceFiles.Add(pageTextFile); if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pd.close(); } else if (File.Exists(pagePDFFile)) { org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile); pageText = dataStripper.getText(document); document.close(); sourceFiles.Add(pagePDFFile); if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pd.close(); } else { if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pageText = dataStripper.getText(pd); pd.save(pagePDFFile); sourceFiles.Add(pagePDFFile); pd.close(); File.WriteAllText(pageTextFile, pageText); sourceFiles.Add(pageTextFile); } pages.Add(pagePDFFile, pageText); } pdDocument.close(); // parse lot summary _Log.Debug($"****ParseData - Parsing lot summary"); List> pageMapping = new List>(); string headerFileName = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", pages.Count, ".pdf"); HeaderFile headerFile = ParseLotSummary(logic, headerFileName, pages, slots); foreach (KeyValuePair keyValuePair in pages) { if (keyValuePair.Key == headerFileName) continue; if (string.IsNullOrEmpty(keyValuePair.Value.Trim())) { pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } if (!pages.ContainsKey(keyValuePair.Key)) throw new Exception(); dataFile = ParseWaferSummary(headerFile, keyValuePair.Key, pages); if (string.IsNullOrEmpty(dataFile.Recipe) || dataFile.Recipe != headerFile.Recipe) { missingSlots.Add(keyValuePair.Key); pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } if (!slots.ContainsKey(dataFile.Slot)) { missingSlots.Add(keyValuePair.Key); pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } pageMapping.Add(new Tuple(keyValuePair.Key, string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", dataFile.Slot.Replace('*', 's'), "_data.pdf"))); slots[dataFile.Slot].Add(dataFile); } string checkFileName = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_data.pdf"); if (!File.Exists(checkFileName)) { File.Move(headerFileName, checkFileName); sourceFiles.Remove(headerFileName); sourceFiles.Add(checkFileName); } checkFileName = string.Empty; for (int i = pageMapping.Count - 1; i > -1; i--) { if (!string.IsNullOrEmpty(pageMapping[i].Item2)) { checkFileName = pageMapping[i].Item2; if (!File.Exists(checkFileName)) { File.Move(pageMapping[i].Item1, checkFileName); sourceFiles.Remove(pageMapping[i].Item1); sourceFiles.Add(checkFileName); } } else if (!string.IsNullOrEmpty(checkFileName)) { //if (i == 0 || !string.IsNullOrEmpty(pageMapping[i - 1].Item2)) //{ checkFileName = checkFileName.Replace("_data.pdf", "_image.pdf"); if (!File.Exists(checkFileName)) { File.Move(pageMapping[i].Item1, checkFileName); sourceFiles.Remove(pageMapping[i].Item1); sourceFiles.Add(checkFileName); } //} checkFileName = string.Empty; } } foreach (KeyValuePair> keyValuePair in slots) { if (!keyValuePair.Value.Any() || keyValuePair.Value[0] is null) missingSlots.Add(string.Concat("Slot ", keyValuePair.Key, ") is missing.")); else { foreach (DataFile data in keyValuePair.Value) dataFiles.Add(data); } } if (missingSlots.Any()) { string missingSlotsFile = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_MissingSlots.txt"); File.WriteAllLines(missingSlotsFile, missingSlots); sourceFiles.Add(missingSlotsFile); } headerFile.Date = DateTime.Parse(headerFile.Date).ToString(); //for (int i = 0; i < dataFiles.Count; i++) // dataFiles[i].Date = DateTime.Parse(dataFiles[i].Date).ToString(); foreach (string sourceFile in sourceFiles) fileInfoCollection.Add(new FileInfo(sourceFile)); fileInfoCollection.Add(new FileInfo(logic.Logistics.ReportFullPath)); result = new Tuple>(headerFile, dataFiles); return result; } } }