using Adaptation.Shared; using Adaptation.Shared.Metrology; using log4net; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; namespace Adaptation.Helpers { public partial class ProcessData : IProcessData { public HeaderFile Header { get; private set; } public List Details { get; private set; } private int _I; private readonly ILog _Log; private string _Data; public ProcessData(ILogic logic, ConfigData configData, List fileInfoCollection) { Header = null; fileInfoCollection.Clear(); _I = 0; _Data = string.Empty; Details = new List(); _Log = LogManager.GetLogger(typeof(ProcessData)); Tuple> tuple = Parse(logic, configData, fileInfoCollection); Details.AddRange(tuple.Item2); Header = tuple.Item1; } public Tuple> GetResults(ILogic logic, ConfigDataBase configDataBase, List fileInfoCollection) { Tuple> results; if (!(configDataBase is ConfigData configData)) throw new Exception(); List tests = new List(); List descriptions; EventName eventName = configData.GetEventNameValue(); if (eventName == EventName.FileRead && Details.Any()) { foreach (DataFile item in Details) tests.Add(Test.Tencor); descriptions = configData.GetDescription(logic, tests, this); } else throw new Exception(); if (!configData.EafHosted) { new FileRead.Description().GetDescription(logic, configData, tests, this); } if (tests.Count != descriptions.Count) throw new Exception(); for (int i = 0; i < tests.Count; i++) { if (descriptions[i].Test != (int)tests[i]) throw new Exception(); } string json; if (descriptions[0] is Duplicator.Description) { List duplicatorDescriptions = (from l in descriptions select (Duplicator.Description)l).ToList(); json = JsonSerializer.Serialize(duplicatorDescriptions, duplicatorDescriptions.GetType()); } else if (descriptions[0] is FileRead.Description) { List fileReadDescriptions = (from l in descriptions select (FileRead.Description)l).ToList(); json = JsonSerializer.Serialize(fileReadDescriptions, fileReadDescriptions.GetType()); } else throw new Exception(); object @object = JsonSerializer.Deserialize(json); if (!(@object is JsonElement jsonElement)) throw new Exception(); results = new Tuple>(logic.Logistics.Logistics1[0], jsonElement, fileInfoCollection); return results; } public static Dictionary> GetKeyValuePairs(ConfigData configData, JsonElement jsonElement, List processDataDescriptions, bool extra = false) { Dictionary> results = configData.GetKeyValuePairs(processDataDescriptions); configData.CheckProcessDataDescription(results, extra); return results; } public static List GetProcessDataFileReadDescriptions(ConfigData configData, JsonElement jsonElement) { List results = new List(); List processDataDescriptions = configData.GetIProcessDataDescriptions(jsonElement); foreach (IProcessDataDescription processDataDescription in processDataDescriptions) { if (!(processDataDescription is FileRead.Description description)) continue; results.Add(description); } return results; } public static string GetLines(ILogic logic, List descriptions, bool ganPPTST) { StringBuilder result = new StringBuilder(); FileRead.Description x = descriptions[0]; if (ganPPTST) { string slot; string reactor; const int eight = 8; DateTime dateTime = DateTime.Parse(x.Date); string lot = x.Lot.ToLower().Replace("69-", string.Empty).Replace("71-", string.Empty).Replace("-", string.Empty); if (string.IsNullOrEmpty(x.Lot) || x.Lot.Length < 2) reactor = "R"; else reactor = string.Concat("R", x.Lot.Substring(0, 2)); result.Append(nameof(x.Date)).Append(";"). Append("Part").Append(";"). Append(nameof(x.Reactor)).Append(";"). Append("Lot").Append(";"). Append(nameof(DataFile.Slot)).Append(";"). Append(nameof(DataFile.Bin1)).Append(";"). Append(nameof(DataFile.Bin2)).Append(";"). Append(nameof(DataFile.Bin3)).Append(";"). Append(nameof(DataFile.Bin4)).Append(";"). Append(nameof(DataFile.Bin5)).Append(";"). Append(nameof(DataFile.Bin6)).Append(";"). Append("Bin9"). AppendLine(); foreach (FileRead.Description description in descriptions) { slot = description.Slot.Replace("*", string.Empty); result.Append("!").Append(dateTime.ToString("MM/dd/yyyy HH:mm:ss")).Append(";"). Append("Particle Adder;"). Append(reactor).Append(";"). Append(lot).Append(";"). Append(slot).Append(";"). Append(description.Bin1).Append(";"). Append(description.Bin2).Append(";"). Append(description.Bin3).Append(";"). Append(description.Bin4).Append(";"). Append(description.Bin5).Append(";"). Append(description.Bin6).Append(";"). Append(description.AreaCount). AppendLine(); } if (descriptions.Count != eight) { string negitiveTenThousand = "-10000"; for (int i = descriptions.Count; i < eight; i++) { result.Append("!").Append(dateTime.ToString("MM/dd/yyyy HH:mm:ss")).Append(";"). Append("Particle Adder;"). Append(reactor).Append(";"). Append(lot).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand).Append(";"). Append(negitiveTenThousand). AppendLine(); } } if (result.ToString().Split('\n').Length != (eight + 2)) throw new Exception(string.Concat("Must have ", eight, " samples")); } else { char del = '\t'; result.Append(x.AreaCountAvg).Append(del). // 001 - AreaCountAvg Append(x.AreaCountMax).Append(del). // 002 - AreaCountMax Append(x.AreaCountMin).Append(del). // 003 - AreaCountMin Append(x.AreaCountStdDev).Append(del). // 004 - AreaCountStdDev Append(x.AreaTotalAvg).Append(del). // 005 - AreaTotalAvg Append(x.AreaTotalMax).Append(del). // 006 - AreaTotalMax Append(x.AreaTotalMin).Append(del). // 007 - AreaTotalMin Append(x.AreaTotalStdDev).Append(del). // 008 - AreaTotalStdDev Append(x.Date).Append(del). // 009 - Append(x.HazeAverageAvg).Append(del). // 010 - Haze Average Append(x.HazeAverageMax).Append(del). // 011 - Append(x.HazeAverageMin).Append(del). // 012 - Append(x.HazeAverageStdDev).Append(del). // 013 - Append(x.HazeRegionAvg).Append(del). // 014 - Append(x.HazeRegionMax).Append(del). // 015 - Append(x.HazeRegionMin).Append(del). // 016 - Append(x.HazeRegionStdDev).Append(del). // 017 - Append(x.Lot).Append(del). // 018 - Append(x.LPDCM2Avg).Append(del). // 019 - Append(x.LPDCM2Max).Append(del). // 020 - Append(x.LPDCM2Min).Append(del). // 021 - Append(x.LPDCM2StdDev).Append(del). // 022 - Append(x.LPDCountAvg).Append(del). // 023 - Append(x.LPDCountMax).Append(del). // 024 - Append(x.LPDCM2Min).Append(del). // 025 - Append(x.LPDCountStdDev).Append(del). // 026 - Append(x.Employee).Append(del). // 027 - Append(x.RDS).Append(del). // 028 - Lot Append(x.Reactor).Append(del). // 029 - Process Append(x.Recipe.Replace(";", string.Empty)).Append(del). // 030 - Part Append(x.ScratchCountAvg).Append(del). // 031 - Scratch Count Append(x.ScratchCountMax).Append(del). // 032 - Append(x.ScratchCountMin).Append(del). // 033 - Append(x.ScratchTotalStdDev).Append(del). // 034 - Append(x.ScratchTotalAvg).Append(del). // 035 - Scratch Length Append(x.ScratchTotalMax).Append(del). // 036 - Append(x.ScratchTotalMin).Append(del). // 037 - Append(x.ScratchTotalStdDev).Append(del). // 038 - Append(x.SumOfDefectsAvg).Append(del). // 039 - Average Sum of Defects Append(x.SumOfDefectsMax).Append(del). // 040 - Max Sum of Defects Append(x.SumOfDefectsMin).Append(del). // 041 - Min Sum of Defects Append(x.SumOfDefectsStdDev).Append(del). // 042 - SumOfDefectsStdDev Append(logic.Logistics.MesEntity).Append(del). // 043 - AppendLine(); } return result.ToString(); } private static void UpdateDataPDF(List descriptions, string checkFileName) { string value; object possiblePage; object possibleString; object possibleCOSArray; java.util.List tokenList; java.util.List arrayList; java.io.OutputStream outputStream; java.util.ListIterator tokenIterator; java.util.ListIterator arrayIterator; List updateValues = new List(); string reactorLoadLock = descriptions[0].Comments; StringBuilder stringBuilder = new StringBuilder(); java.io.File file = new java.io.File(checkFileName); org.apache.pdfbox.pdmodel.common.PDStream pdStream; org.apache.pdfbox.pdmodel.common.PDStream updatedStream; org.apache.pdfbox.pdfparser.PDFStreamParser pdfStreamParser; org.apache.pdfbox.pdfwriter.ContentStreamWriter contentStreamWriter; org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); org.apache.pdfbox.pdmodel.PDDocumentCatalog pdDocumentCatalog = pdDocument.getDocumentCatalog(); java.util.List pagesList = pdDocumentCatalog.getAllPages(); java.util.ListIterator pageIterator = pagesList.listIterator(); for (short i = 1; i < short.MaxValue; i++) { if (!pageIterator.hasNext()) break; possiblePage = pageIterator.next(); if (!(possiblePage is org.apache.pdfbox.pdmodel.PDPage page)) continue; pdStream = page.getContents(); pdfStreamParser = new org.apache.pdfbox.pdfparser.PDFStreamParser(pdStream); pdfStreamParser.parse(); tokenList = pdfStreamParser.getTokens(); tokenIterator = tokenList.listIterator(); for (short t = 1; i < short.MaxValue; t++) { if (!tokenIterator.hasNext()) break; possibleCOSArray = tokenIterator.next(); if (!(possibleCOSArray is org.apache.pdfbox.cos.COSArray cossArray)) continue; stringBuilder.Clear(); arrayList = cossArray.toList(); arrayIterator = arrayList.listIterator(); for (short a = 1; i < short.MaxValue; a++) { if (!arrayIterator.hasNext()) break; possibleString = arrayIterator.next(); if (!(possibleString is org.apache.pdfbox.cos.COSString cossString)) continue; value = cossString.getString(); stringBuilder.Append(value); if (value != "]") continue; updateValues.Add(value); value = stringBuilder.ToString(); if (value.Contains("[]")) cossArray.setString(a - 1, string.Concat("*", reactorLoadLock, "]")); else cossArray.setString(a - 1, string.Concat(" {*", reactorLoadLock, "}]")); } } if (updateValues.Any()) { updatedStream = new org.apache.pdfbox.pdmodel.common.PDStream(pdDocument); outputStream = updatedStream.createOutputStream(); contentStreamWriter = new org.apache.pdfbox.pdfwriter.ContentStreamWriter(outputStream); contentStreamWriter.writeTokens(tokenList); outputStream.close(); page.setContents(updatedStream); } } if (updateValues.Any()) pdDocument.save(checkFileName); pdDocument.close(); } internal static void PostOpenInsightMetrologyViewerAttachments(ILog log, ConfigData configData, Logistics logistics, DateTime dateTime, string logisticsSequenceMemoryDirectory, List descriptions, string matchDirectory) { string checkFileName; string[] pclFiles = Directory.GetFiles(matchDirectory, "*.pcl", SearchOption.TopDirectoryOnly); if (pclFiles.Length != 1) throw new Exception("Invalid source file count!"); string sourceFileNameNoExt = Path.GetFileNameWithoutExtension(pclFiles[0]); string wsResultsMemoryFile = string.Concat(logisticsSequenceMemoryDirectory, @"\", nameof(WS.Results), ".json"); if (!File.Exists(wsResultsMemoryFile)) throw new Exception(string.Concat("Memory file <", wsResultsMemoryFile, "> doesn't exist!")); string json = File.ReadAllText(wsResultsMemoryFile); WS.Results metrologyWSRequest = JsonSerializer.Deserialize(json); long wsResultsHeaderID = metrologyWSRequest.HeaderID; List dataAttachments = new List(); List headerAttachments = new List(); checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_data.pdf"); if (!File.Exists(checkFileName)) log.Debug("Header file doesn't exist!"); else { UpdateDataPDF(descriptions, checkFileName); headerAttachments.Add(new WS.Attachment(descriptions[0].HeaderUniqueId, "Data.pdf", checkFileName)); } foreach (FileRead.Description description in descriptions) { checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_", description.Slot.Replace('*', 's'), "_image.pdf"); if (File.Exists(checkFileName)) dataAttachments.Add(new WS.Attachment(description.UniqueId, "Image.pdf", checkFileName)); checkFileName = string.Concat(matchDirectory, @"\", sourceFileNameNoExt, "_", description.Slot.Replace('*', 's'), "_data.pdf"); if (File.Exists(checkFileName)) dataAttachments.Add(new WS.Attachment(description.UniqueId, "Data.pdf", checkFileName)); } if (dataAttachments.Count == 0 || dataAttachments.Count != descriptions.Count) log.Debug("Invalid attachment count!"); WS.AttachFiles(configData.OpenInsightMetrogyViewerAPI, wsResultsHeaderID, headerAttachments, dataAttachments); } /// /// Convert the raw data file to parsable file format - in this case from PCL to PDF /// /// source file to be converted to PDF /// private static string ConvertSourceFileToPdf(ConfigData configData, string sourceFile) { string result = Path.ChangeExtension(sourceFile, ".pdf"); if (!File.Exists(result)) { //string arguments = string.Concat("-i \"", sourceFile, "\" -o \"", result, "\""); string arguments = string.Concat("-dSAFER -dBATCH -dNOPAUSE -sOutputFile=\"", result, "\" -sDEVICE=pdfwrite \"", sourceFile, "\""); //Process process = Process.Start(configData.LincPDFCFileName, arguments); Process process = Process.Start(configData.GhostPCLFileName, arguments); process.WaitForExit(30000); if (!File.Exists(result)) throw new Exception("PDF file wasn't created"); } return result; } /// /// Test and fix a data line from the Lot Summary page if there are two values that are merged. /// /// data line from Lot Summary private void FixToEolArray(ref string[] toEol) { const int MAX_COLUMNS = 9; int[] mColumnWidths = new int[MAX_COLUMNS] { 8, 6, 6, 6, 6, 7, 7, 5, 7 }; // is it short at least one data point if (toEol.Length < MAX_COLUMNS) { _Log.Debug($"****FixToEolArray - Starting array:"); _Log.Debug(toEol); _Log.Debug($"****FixToEolArray - Column widths:"); _Log.Debug(mColumnWidths); string leftVal, rightVal; // size up and assign a working list List toEolList = new List(toEol); if (string.IsNullOrEmpty(toEolList[toEolList.Count - 1])) toEolList.RemoveAt(toEolList.Count - 1); // removes a null element at end _Log.Debug($"****FixToEolArray - New toEolList:"); _Log.Debug(toEolList); for (int i = toEolList.Count; i < MAX_COLUMNS; i++) toEolList.Insert(0, ""); // insert to top of list _Log.Debug(toEolList); // start at the end for (int i = MAX_COLUMNS - 1; i >= 0; i--) { // test for a bad value - does it have too many characters _Log.Debug($"****FixToEolArray - toEolList[i].Length: {toEolList[i].Length}, mColumnWidths[i]: {mColumnWidths[i]}"); if (toEolList[i].Length > mColumnWidths[i]) { // split it up into its two parts leftVal = toEolList[i].Substring(0, toEolList[i].Length - mColumnWidths[i]); rightVal = toEolList[i].Substring(leftVal.Length); _Log.Debug($"****FixToEolArray - Split leftVal: {leftVal}"); _Log.Debug($"****FixToEolArray - Split rightVal: {rightVal}"); // insert new value toEolList[i] = rightVal; toEolList.Insert(i, leftVal); if (string.IsNullOrEmpty(toEolList[0])) toEolList.RemoveAt(0); // removes a null element at end _Log.Debug($"****FixToEolArray - Fixed toEolList:"); _Log.Debug(toEolList); } } toEol = toEolList.ToArray(); _Log.Debug($"****FixToEolArray - Ending array:"); _Log.Debug(toEol); } } private void ScanPast(string text) { int num = _Data.IndexOf(text, _I); if (num > -1) _I = num + text.Length; else _I = _Data.Length; } private string GetBefore(string text) { int num = _Data.IndexOf(text, _I); if (num > -1) { string str = _Data.Substring(_I, num - _I); _I = num + text.Length; return str.Trim(); } string str1 = _Data.Substring(_I); _I = _Data.Length; return str1.Trim(); } private string GetBefore(string text, bool trim) { if (trim) return GetBefore(text); int num = _Data.IndexOf(text, _I); if (num > -1) { string str = _Data.Substring(_I, num - _I); _I = num + text.Length; return str; } string str1 = _Data.Substring(_I); _I = _Data.Length; return str1; } private bool IsNullOrWhiteSpace(string text) { for (int index = 0; index < text.Length; ++index) { if (!char.IsWhiteSpace(text[index])) return false; } return true; } private bool IsBlankLine() { int num = _Data.IndexOf("\n", _I); return IsNullOrWhiteSpace(num > -1 ? _Data.Substring(_I, num - _I) : _Data.Substring(_I)); } private string GetToEOL() { return GetBefore("\n"); } private string GetToEOL(bool trim) { if (trim) return GetToEOL(); return GetBefore("\n", false); } private string GetToText(string text) { return _Data.Substring(_I, _Data.IndexOf(text, _I) - _I).Trim(); } private string GetToken() { while (_I < _Data.Length && IsNullOrWhiteSpace(_Data.Substring(_I, 1))) ++_I; int j = _I; while (j < _Data.Length && !IsNullOrWhiteSpace(_Data.Substring(j, 1))) ++j; string str = _Data.Substring(_I, j - _I); _I = j; return str.Trim(); } private string PeekNextLine() { int j = _I; string toEol = GetToEOL(); _I = j; return toEol; } private HeaderFile ParseLotSummary(ILogic logic, string headerFileName, Dictionary pages, Dictionary> slots) { HeaderFile result = new HeaderFile { JobID = logic.Logistics.JobID, MesEntity = logic.Logistics.MesEntity, Date = DateTime.Now.ToString() }; _I = 0; //string headerText; //string altHeaderFileName = Path.ChangeExtension(headerFileName, ".txt"); //if (File.Exists(altHeaderFileName)) // headerText = File.ReadAllText(altHeaderFileName); //else //{ // //Pdfbox, IKVM.AWT.WinForms // org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(headerFileName); // org.apache.pdfbox.util.PDFTextStripper stripper = new org.apache.pdfbox.util.PDFTextStripper(); // headerText = stripper.getText(pdfDocument); // pdfDocument.close(); // File.AppendAllText(altHeaderFileName, headerText); //} //result.Id = h; //result.Title = h; //result.Zone = h; //result.PSN = h; //result.Layer = h; result.ParseErrorText = string.Empty; if (!pages.ContainsKey(headerFileName)) throw new Exception(); _I = 0; _Data = pages[headerFileName]; ScanPast("Date:"); result.Date = GetToEOL(); ScanPast("Recipe ID:"); result.Recipe = GetBefore("LotID:"); result.Recipe = result.Recipe.Replace(";", ""); if (_Data.Contains("[]")) result.Lot = GetBefore("[]"); else if (_Data.Contains("[7]")) result.Lot = GetBefore("[7]"); else result.Lot = GetBefore("["); // Remove illegal characters \/:*?"<>| found in the Lot. result.Lot = Regex.Replace(result.Lot, @"[\\,\/,\:,\*,\?,\"",\<,\>,\|]", "_").Split('\r')[0].Split('\n')[0]; // determine number of wafers and their slot numbers _Log.Debug(_Data.Substring(_I)); string slot; string toEOL; int slotCount = _Data.Substring(_I).Split('*').Length - 1; _Log.Debug($"****HeaderFile - Slot Count: {slotCount}."); for (int i = 0; i < slotCount; i++) { ScanPast("*"); toEOL = GetToEOL(false); slot = string.Concat("*", toEOL.Substring(0, 2)); if (!slots.ContainsKey(slot)) slots.Add(slot, new List()); } _Log.Debug($"****HeaderFile - Slots:"); _Log.Debug(slots); ScanPast("Min:"); string[] toEol1 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol1 Count: {toEol1.Length}."); FixToEolArray(ref toEol1); result.LPDCountMin = toEol1[0].Trim(); result.LPDCM2Min = toEol1[1].Trim(); result.AreaCountMin = toEol1[2].Trim(); result.AreaTotalMin = toEol1[3].Trim(); result.ScratchCountMin = toEol1[4].Trim(); result.ScratchTotalMin = toEol1[5].Trim(); result.SumOfDefectsMin = toEol1[6].Trim(); result.HazeRegionMin = toEol1[7].Trim(); result.HazeAverageMin = toEol1[8].Trim(); ScanPast("Max:"); string[] toEol2 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol2 Count: {toEol2.Length}."); FixToEolArray(ref toEol2); result.LPDCountMax = toEol2[0].Trim(); result.LPDCM2Max = toEol2[1].Trim(); result.AreaCountMax = toEol2[2].Trim(); result.AreaTotalMax = toEol2[3].Trim(); result.ScratchCountMax = toEol2[4].Trim(); result.ScratchTotalMax = toEol2[5].Trim(); result.SumOfDefectsMax = toEol2[6].Trim(); result.HazeRegionMax = toEol2[7].Trim(); result.HazeAverageMax = toEol2[8].Trim(); ScanPast("Average:"); string[] toEol3 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol3 Count: {toEol3.Length}."); FixToEolArray(ref toEol3); result.LPDCountAvg = toEol3[0].Trim(); result.LPDCM2Avg = toEol3[1].Trim(); result.AreaCountAvg = toEol3[2].Trim(); result.AreaTotalAvg = toEol3[3].Trim(); result.ScratchCountAvg = toEol3[4].Trim(); result.ScratchTotalAvg = toEol3[5].Trim(); result.SumOfDefectsAvg = toEol3[6].Trim(); result.HazeRegionAvg = toEol3[7].Trim(); result.HazeAverageAvg = toEol3[8].Trim(); ScanPast("Std Dev:"); string[] toEol4 = GetToEOL(false).Trim().Split(' '); _Log.Debug($"****HeaderFile - toEol4 Count: {toEol4.Length}."); FixToEolArray(ref toEol4); result.LPDCountStdDev = toEol4[0].Trim(); result.LPDCM2StdDev = toEol4[1].Trim(); result.AreaCountStdDev = toEol4[2].Trim(); result.AreaTotalStdDev = toEol4[3].Trim(); result.ScratchCountStdDev = toEol4[4].Trim(); result.ScratchTotalStdDev = toEol4[5].Trim(); result.SumOfDefectsStdDev = toEol4[6].Trim(); result.HazeRegionStdDev = toEol4[7].Trim(); result.HazeAverageStdDev = toEol4[8].Trim(); string[] segments = result.Lot.Split('-'); if (segments.Length > 0) result.Reactor = segments[0]; if (segments.Length > 1) result.RDS = segments[1]; if (segments.Length > 2) result.PSN = segments[2]; // Example of header.UniqueId is TENCOR1_33-289217-4693_201901300556533336 result.UniqueId = string.Format("{0}_{1}_{2}", logic.Logistics.JobID, result.Lot, Path.GetFileNameWithoutExtension(logic.Logistics.ReportFullPath)); return result; } private DataFile ParseWaferSummary(HeaderFile headerFile, string waferFileName, Dictionary pages) { DataFile result = new DataFile { Data = "*Data*", i = -1, }; _I = 0; //string waferText; //string altWaferFileName = Path.ChangeExtension(waferFileName, ".txt"); //if (File.Exists(altWaferFileName)) // waferText = File.ReadAllText(altWaferFileName); //else //{ // //Pdfbox, IKVM.AWT.WinForms // org.apache.pdfbox.pdmodel.PDDocument pdfDocument = org.apache.pdfbox.pdmodel.PDDocument.load(waferFileName); // org.apache.pdfbox.util.PDFTextStripper dataStripper = new org.apache.pdfbox.util.PDFTextStripper(); // waferText = dataStripper.getText(pdfDocument); // pdfDocument.close(); // File.AppendAllText(altWaferFileName, waferText); //} List stringList = new List(); result.HeaderUniqueId = headerFile.UniqueId; result.Id = 0; result.Title = null; if (!pages.ContainsKey(waferFileName)) throw new Exception(); _I = 0; _Data = pages[waferFileName]; ScanPast("Date:"); result.Date = GetToEOL(); ScanPast("ID#"); result.Slot = GetToEOL(); if (result.Slot.Length > 5) result.Slot = string.Concat(result.Slot.Substring(0, 5), "... - ***"); //result.Slot = result.Slot.Replace("*", ""); ScanPast("Comments:"); result.Comments = GetToEOL(); ScanPast("Sort:"); result.Sort = GetToEOL(); ScanPast("LPD Count:"); result.LPDCount = GetToEOL(); ScanPast("LPD / cm2:"); result.LPDCM2 = GetToEOL(); while (GetBefore(":").Contains("Bin")) stringList.Add(GetToEOL()); if (stringList.Count >= 1) result.Bin1 = stringList[0]; if (stringList.Count >= 2) result.Bin2 = stringList[1]; if (stringList.Count >= 3) result.Bin3 = stringList[2]; if (stringList.Count >= 4) result.Bin4 = stringList[3]; if (stringList.Count >= 5) result.Bin5 = stringList[4]; if (stringList.Count >= 6) result.Bin6 = stringList[5]; if (stringList.Count >= 7) result.Bin7 = stringList[6]; if (stringList.Count >= 8) result.Bin8 = stringList[7]; result.Mean = GetToEOL(); ScanPast("Std Dev:"); result.StdDev = GetToEOL(); ScanPast("Area Count:"); result.AreaCount = GetToEOL(); ScanPast("Area Total:"); result.AreaTotal = GetToEOL(); ScanPast("Scratch Count:"); result.ScratchCount = GetToEOL(); ScanPast("Scratch Total:"); result.ScratchTotal = GetToEOL(); ScanPast("Sum of All Defects:"); result.SumOfDefects = GetToEOL(); ScanPast("Haze Region:"); result.HazeRegion = GetToEOL(); ScanPast("Haze Average:"); result.HazeAverage = GetToEOL(); ScanPast("Haze Peak:"); result.HazePeak = GetToEOL(); ScanPast("Laser:"); result.Laser = GetBefore("Gain:"); result.Gain = GetBefore("Diameter:"); result.Diameter = GetToEOL(); ScanPast("Thresh:"); result.Thresh = GetBefore("Exclusion:"); result.Exclusion = GetToEOL(); ScanPast("Haze Rng:"); result.HazeRng = GetBefore("Thruput:"); result.Thruput = GetToEOL(); ScanPast("Recipe ID:"); result.Recipe = GetToEOL(); result.UniqueId = string.Format("{0}_{1}", headerFile.UniqueId, result.Slot.Replace("*", string.Empty).TrimStart('0')); return result; } private Tuple> Parse(ILogic logic, ConfigData configData, List fileInfoCollection) { Tuple> result; object item; string pageText; string pagePDFFile; string pageTextFile; List sourceFiles = new List(); List missingSlots = new List(); List dataFiles = new List(); Dictionary pages = new Dictionary(); string sourcePath = Path.GetDirectoryName(logic.Logistics.ReportFullPath); Dictionary> slots = new Dictionary>(); string sourceFileNamePdf = ConvertSourceFileToPdf(configData, logic.Logistics.ReportFullPath); sourceFiles.Add(sourceFileNamePdf); string sourceFileNameNoExt = Path.GetFileNameWithoutExtension(logic.Logistics.ReportFullPath); ////PdfSharp open pdf //using (PdfSharp.Pdf.PdfDocument sourceDocument = PdfSharp.Pdf.IO.PdfReader.Open(sourceFileNamePdf, PdfSharp.Pdf.IO.PdfDocumentOpenMode.Import)) //{ // for (int idxPage = 0; idxPage < sourceDocument.PageCount; idxPage++) // { // // split the pdf into seperate pages. Odd pages are wafer image, even are wafer summary. Last page is Lot Summary. // _Log.Debug($"****ParseData - Splitting page: {idxPage}, sourceDocument: {sourceDocument.FullPath}, sourcePathFileNoExt: {sourcePathFileNoExt}"); // //SplitPage(sourceDocument, sourcePathFileNoExt, idxPage); // pageNum = idxPage + 1; // pageFile = string.Format("{0}_{1}.pdf", sourcePathFileNoExt, pageNum); // _Log.Debug($"****SplitPage - Page {pageNum} Source file: {sourceDocument.FullPath}"); // _Log.Debug($"****SplitPage - Page {pageNum} Output file: {pageFile}"); // //PdfSharp Create new document // PdfSharp.Pdf.PdfDocument outputDocument = new PdfSharp.Pdf.PdfDocument { Version = sourceDocument.Version }; // outputDocument.Info.Title = string.Format("Page {0} of {1}", pageNum, sourceDocument.Info.Title); // outputDocument.Info.Creator = sourceDocument.Info.Creator; // outputDocument.AddPage(sourceDocument.Pages[idxPage]); // outputDocument.Pages[0].CropBox = new PdfSharp.Pdf.PdfRectangle(new PdfSharp.Drawing.XRect(0, 100, 700, 700)); // outputDocument.Save(pageFile); // } // sourceDocumentPageCount = sourceDocument.PageCount; // sourceDocument.Close(); //} java.io.File file = new java.io.File(sourceFileNamePdf); org.apache.pdfbox.util.Splitter splitter = new org.apache.pdfbox.util.Splitter(); org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(file); java.util.List list = splitter.split(pdDocument); java.util.ListIterator iterator = list.listIterator(); org.apache.pdfbox.util.PDFTextStripper dataStripper = new org.apache.pdfbox.util.PDFTextStripper(); for (short i = 1; i < short.MaxValue; i++) { if (!iterator.hasNext()) break; item = iterator.next(); pagePDFFile = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", i, ".pdf"); pageTextFile = Path.ChangeExtension(pagePDFFile, ".txt"); if (File.Exists(pageTextFile)) { pageText = File.ReadAllText(pageTextFile); sourceFiles.Add(pageTextFile); if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pd.close(); } else if (File.Exists(pagePDFFile)) { org.apache.pdfbox.pdmodel.PDDocument document = org.apache.pdfbox.pdmodel.PDDocument.load(pagePDFFile); pageText = dataStripper.getText(document); document.close(); sourceFiles.Add(pagePDFFile); if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pd.close(); } else { if (!(item is org.apache.pdfbox.pdmodel.PDDocument pd)) continue; pageText = dataStripper.getText(pd); pd.save(pagePDFFile); sourceFiles.Add(pagePDFFile); pd.close(); File.WriteAllText(pageTextFile, pageText); sourceFiles.Add(pageTextFile); } pages.Add(pagePDFFile, pageText); } pdDocument.close(); // parse lot summary _Log.Debug($"****ParseData - Parsing lot summary"); List> pageMapping = new List>(); string headerFileName = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", pages.Count, ".pdf"); HeaderFile headerFile = ParseLotSummary(logic, headerFileName, pages, slots); foreach (KeyValuePair keyValuePair in pages) { if (keyValuePair.Key == headerFileName) continue; if (string.IsNullOrEmpty(keyValuePair.Value.Trim())) { pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } if (!pages.ContainsKey(keyValuePair.Key)) throw new Exception(); DataFile dataFile = ParseWaferSummary(headerFile, keyValuePair.Key, pages); if (string.IsNullOrEmpty(dataFile.Recipe) || dataFile.Recipe != headerFile.Recipe) { missingSlots.Add(keyValuePair.Key); pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } if (!slots.ContainsKey(dataFile.Slot)) { missingSlots.Add(keyValuePair.Key); pageMapping.Add(new Tuple(keyValuePair.Key, string.Empty)); continue; } pageMapping.Add(new Tuple(keyValuePair.Key, string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_", dataFile.Slot.Replace('*', 's'), "_data.pdf"))); slots[dataFile.Slot].Add(dataFile); } string checkFileName = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_data.pdf"); if (!File.Exists(checkFileName)) { File.Move(headerFileName, checkFileName); sourceFiles.Remove(headerFileName); sourceFiles.Add(checkFileName); } checkFileName = string.Empty; for (int i = pageMapping.Count - 1; i > -1; i--) { if (!string.IsNullOrEmpty(pageMapping[i].Item2)) { checkFileName = pageMapping[i].Item2; if (!File.Exists(checkFileName)) { File.Move(pageMapping[i].Item1, checkFileName); sourceFiles.Remove(pageMapping[i].Item1); sourceFiles.Add(checkFileName); } } else if (!string.IsNullOrEmpty(checkFileName)) { //if (i == 0 || !string.IsNullOrEmpty(pageMapping[i - 1].Item2)) //{ checkFileName = checkFileName.Replace("_data.pdf", "_image.pdf"); if (!File.Exists(checkFileName)) { File.Move(pageMapping[i].Item1, checkFileName); sourceFiles.Remove(pageMapping[i].Item1); sourceFiles.Add(checkFileName); } //} checkFileName = string.Empty; } } foreach (KeyValuePair> keyValuePair in slots) { if (!keyValuePair.Value.Any() || keyValuePair.Value[0] is null) missingSlots.Add(string.Concat("Slot ", keyValuePair.Key, ") is missing.")); else { foreach (DataFile data in keyValuePair.Value) dataFiles.Add(data); } } if (missingSlots.Any()) { string missingSlotsFile = string.Concat(sourcePath, @"\", sourceFileNameNoExt, "_MissingSlots.txt"); File.WriteAllLines(missingSlotsFile, missingSlots); sourceFiles.Add(missingSlotsFile); } headerFile.Date = DateTime.Parse(headerFile.Date).ToString(); //Equipment data is wrong!!! headerFile.Date = DateTime.Now.ToString(); //Equipment data is wrong!!! //for (int i = 0; i < dataFiles.Count; i++) // dataFiles[i].Date = DateTime.Parse(dataFiles[i].Date).ToString(); foreach (string sourceFile in sourceFiles) fileInfoCollection.Add(new FileInfo(sourceFile)); fileInfoCollection.Add(new FileInfo(logic.Logistics.ReportFullPath)); result = new Tuple>(headerFile, dataFiles); return result; } } }