using System.Collections.ObjectModel; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using Microsoft.Extensions.Logging; namespace File_Folder_Helper.ADO2025.PI5; internal static partial class Helper20250219 { private record ProcessDataStandardFormat(ReadOnlyCollection Body, ReadOnlyCollection Columns, ReadOnlyCollection Logistics, long? Sequence); [JsonSourceGenerationOptions(WriteIndented = true)] [JsonSerializable(typeof(JsonElement[]))] private partial class JsonElementCollectionSourceGenerationContext : JsonSerializerContext { } private record ProcessDataStandardFormatMapping(ReadOnlyCollection BackfillColumns, ReadOnlyCollection ColumnIndices, ReadOnlyCollection IgnoreColumns, ReadOnlyCollection IndexOnlyColumns, ReadOnlyDictionary KeyValuePairs, ReadOnlyCollection NewColumnNames, ReadOnlyCollection OldColumnNames); internal static void Compare(ILogger logger, List args) { string[] segmentsB; List distinct = []; string searchPattern = args[2]; string searchPatternB = args[3]; string[] segments = args[7].Split(','); Dictionary keyValuePairs = []; ReadOnlyCollection ignore = args[4].Split(',').AsReadOnly(); ReadOnlyCollection backfill = args[5].Split(',').AsReadOnly(); ReadOnlyCollection indexOnly = args[6].Split(',').AsReadOnly(); ReadOnlyCollection oldColumnNames = args[8].Split(',').AsReadOnly(); ReadOnlyCollection newColumnNames = args[9].Split(',').AsReadOnly(); ReadOnlyCollection columnIndices = args[10].Split(',').Select(int.Parse).ToArray().AsReadOnly(); foreach (string segment in segments) { segmentsB = segment.Split('|'); if (segmentsB.Length != 2) continue; if (distinct.Contains(segmentsB[0])) continue; distinct.Add(segmentsB[0]); keyValuePairs.Add(segmentsB[0], segmentsB[1]); } ProcessDataStandardFormatMapping processDataStandardFormatMapping = new(BackfillColumns: backfill, ColumnIndices: columnIndices, NewColumnNames: newColumnNames, IgnoreColumns: ignore, IndexOnlyColumns: indexOnly, KeyValuePairs: keyValuePairs.AsReadOnly(), OldColumnNames: oldColumnNames); string sourceDirectory = Path.GetFullPath(args[0]); string[] files = Directory.GetFiles(sourceDirectory, searchPattern, SearchOption.AllDirectories); logger.LogInformation("<{files}>(s)", files.Length); Compare(logger, sourceDirectory.Length, searchPatternB, processDataStandardFormatMapping, files); } private static void Compare(ILogger logger, int sourceDirectoryLength, string searchPattern, ProcessDataStandardFormatMapping pdsfMapping, string[] files) { bool compare; string directory; string[] matches; string directorySegment; string[] directoryFiles; const int columnsLine = 6; JsonElement[]? jsonElementsNew; JsonElement[]? jsonElementsOld; ProcessDataStandardFormat processDataStandardFormat; FileInfo[] collection = files.Select(l => new FileInfo(l)).ToArray(); foreach (FileInfo fileInfo in collection) { directory = fileInfo.DirectoryName ?? throw new Exception(); directoryFiles = Directory.GetFiles(directory, searchPattern, SearchOption.TopDirectoryOnly); matches = (from l in directoryFiles where l != fileInfo.FullName select l).ToArray(); if (matches.Length < 1) continue; directorySegment = directory[sourceDirectoryLength..]; processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, pdsfMapping.NewColumnNames.Count, columnsLine, fileInfo.FullName, lines: null); jsonElementsNew = GetArray(logger, pdsfMapping.NewColumnNames.Count, processDataStandardFormat, lookForNumbers: false); if (jsonElementsNew is null) continue; if (pdsfMapping.OldColumnNames.Count == pdsfMapping.ColumnIndices.Count) { processDataStandardFormat = GetProcessDataStandardFormat(logger, pdsfMapping, jsonElementsNew, processDataStandardFormat); Write(logger, fileInfo, processDataStandardFormat); } foreach (string match in matches) { processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, pdsfMapping.OldColumnNames.Count, columnsLine, match, lines: null); jsonElementsOld = GetArray(logger, pdsfMapping.OldColumnNames.Count, processDataStandardFormat, lookForNumbers: false); if (jsonElementsOld is null || jsonElementsOld.Length != jsonElementsNew.Length) { logger.LogWarning("! <{match}> (jsonElementsOld.Length:{jsonElementsOld} != jsonElementsNew.Length:{jsonElementsNew})", match, jsonElementsOld?.Length, jsonElementsNew.Length); continue; } compare = Compare(logger, pdsfMapping, directorySegment, jsonElementsNew, jsonElementsOld); if (!compare) { logger.LogWarning("! <{match}>", match); continue; } logger.LogInformation("<{match}>", match); } } } private static bool Compare(ILogger logger, ProcessDataStandardFormatMapping processDataStandardFormatMapping, string directory, JsonElement[] jsonElementsNew, JsonElement[] jsonElementsOld) { bool result; int? q; string valueNew; string valueOld; List columns = []; JsonProperty jsonPropertyOld; JsonProperty jsonPropertyNew; List columnPairs = []; JsonProperty[] jsonPropertiesOld; JsonProperty[] jsonPropertiesNew; List unknownColumns = []; List differentColumns = []; int last = jsonElementsOld.Length - 1; List sameAfterSpaceSplitColumns = []; for (int i = last; i > -1; i--) { if (jsonElementsOld[i].ValueKind != JsonValueKind.Object) { unknownColumns.Add(string.Empty); break; } jsonPropertiesOld = jsonElementsOld[i].EnumerateObject().ToArray(); jsonPropertiesNew = jsonElementsNew[i].EnumerateObject().ToArray(); for (int p = 0; p < jsonPropertiesOld.Length; p++) { jsonPropertyOld = jsonPropertiesOld[p]; valueOld = jsonPropertyOld.Value.ToString(); if (processDataStandardFormatMapping.KeyValuePairs.TryGetValue(jsonPropertyOld.Name, out string? name) && !string.IsNullOrEmpty(name)) { q = TryGetPropertyIndex(jsonPropertiesNew, name); if (q is null && i == 0) unknownColumns.Add($"{jsonPropertyOld.Name}|{name}"); } else { q = TryGetPropertyIndex(jsonPropertiesNew, jsonPropertyOld.Name); if (q is null) { if (i == 0) unknownColumns.Add(jsonPropertyOld.Name); } } if (q is null) { if (processDataStandardFormatMapping.IgnoreColumns.Contains(jsonPropertyOld.Name)) { if (i == last) { columns.Add("-1"); columnPairs.Add($"{jsonPropertyOld.Name}:"); logger.LogDebug("{p} )) {jsonPropertyOld.Name} **", p, jsonPropertyOld.Name); } continue; } if (i == last) { columns.Add("-1"); columnPairs.Add($"{jsonPropertyOld.Name}:"); if (!string.IsNullOrEmpty(valueOld)) logger.LogDebug("{p} )) {jsonPropertyOld.Name} ??", p, jsonPropertyOld.Name); } } else { jsonPropertyNew = jsonPropertiesNew[q.Value]; if (i == last) { columns.Add(q.Value.ToString()); columnPairs.Add($"{jsonPropertyOld.Name}:{jsonPropertyNew.Name}"); } valueNew = jsonPropertyNew.Value.ToString(); if (i == last) logger.LogDebug("{p} )) {jsonPropertyOld.Name} ~~ {q.Value} => {jsonPropertyNew.Name}", p, jsonPropertyOld.Name, q.Value, jsonPropertyNew.Name); if (valueNew != valueOld && !differentColumns.Contains(jsonPropertyOld.Name)) { if (valueNew.Length >= 2 && valueNew.Split(' ')[0] == valueOld) sameAfterSpaceSplitColumns.Add(jsonPropertyOld.Name); else { if (processDataStandardFormatMapping.BackfillColumns.Contains(jsonPropertyOld.Name) && i != last) continue; if (processDataStandardFormatMapping.IndexOnlyColumns.Contains(jsonPropertyOld.Name) && int.TryParse(jsonPropertyOld.Name[^2..], out int index) && i != index - 1) continue; logger.LogWarning("For [{jsonProperty.Name}] <{directory}> doesn't match (valueNew:{valueNew} != valueOld:{valueOld})!", jsonPropertyOld.Name, directory, valueNew, valueOld); differentColumns.Add(jsonPropertyOld.Name); } } } } if (i == last) { logger.LogInformation(string.Join(',', columns)); logger.LogInformation($"{string.Join(';', columnPairs)};"); } } result = unknownColumns.Count == 0 && differentColumns.Count == 0 && sameAfterSpaceSplitColumns.Count == 0; return result; } private static int? TryGetPropertyIndex(JsonProperty[] jsonProperties, string propertyName) { int? result = null; for (int i = 0; i < jsonProperties.Length; i++) { if (jsonProperties[i].Name != propertyName) continue; result = i; break; } if (result is null) { for (int i = 0; i < jsonProperties.Length; i++) { if (jsonProperties[i].Name[0] != propertyName[0]) continue; if (jsonProperties[i].Name.Length != propertyName.Length) continue; if (jsonProperties[i].Name != propertyName) continue; result = i; break; } } return result; } private static ProcessDataStandardFormat GetProcessDataStandardFormat(ILogger logger, DateTime lastWriteTime, int expectedColumns, int columnsLine, string path, string[]? lines) { ProcessDataStandardFormat result; long sequence; string[] segments; List body = []; List logistics = []; bool lookForLogistics = false; lines ??= File.ReadAllLines(path); if (lines.Length <= columnsLine) segments = []; else { segments = lines[columnsLine].Split('\t'); if (segments.Length != expectedColumns) { logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns); segments = []; } } string[] columns = segments.Select(l => l.Trim('"')).ToArray(); for (int r = columnsLine + 1; r < lines.Length; r++) { if (lines[r].StartsWith("NUM_DATA_ROWS")) lookForLogistics = true; if (!lookForLogistics) { body.Add(lines[r]); continue; } if (lines[r].StartsWith("LOGISTICS_1")) { for (int i = r; i < lines.Length; i++) { if (lines[r].StartsWith("END_HEADER")) break; logistics.Add(lines[i]); } break; } } if (logistics.Count == 0) sequence = lastWriteTime.Ticks; else { segments = logistics[0].Split("SEQUENCE="); sequence = segments.Length < 2 || !long.TryParse(segments[1].Split(';')[0], out long s) ? lastWriteTime.Ticks : s; } result = new(Body: body.AsReadOnly(), Columns: columns.AsReadOnly(), Logistics: logistics.AsReadOnly(), Sequence: sequence); return result; } private static JsonElement[]? GetArray(ILogger logger, int expectedColumns, ProcessDataStandardFormat processDataStandardFormat, bool lookForNumbers) { JsonElement[]? results; if (processDataStandardFormat.Body.Count == 0 || !processDataStandardFormat.Body[0].Contains('\t')) results = JsonSerializer.Deserialize("[]", JsonElementCollectionSourceGenerationContext.Default.JsonElementArray) ?? throw new Exception(); else { string value; string[] segments; List lines = []; StringBuilder stringBuilder = new(); foreach (string bodyLine in processDataStandardFormat.Body) { _ = stringBuilder.Clear(); _ = stringBuilder.Append('{'); segments = bodyLine.Split('\t'); if (segments.Length != expectedColumns) { logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns); continue; } if (!lookForNumbers) { for (int c = 0; c < segments.Length; c++) { value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\"); _ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\","); } } else { for (int c = 0; c < segments.Length; c++) { value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\"); if (string.IsNullOrEmpty(value)) _ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append("null,"); else if (value.All(char.IsDigit)) _ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append(','); else _ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\","); } } _ = stringBuilder.Remove(stringBuilder.Length - 1, 1); _ = stringBuilder.AppendLine("}"); lines.Add(stringBuilder.ToString()); } string json = $"[{string.Join(',', lines)}]"; results = JsonSerializer.Deserialize(json, JsonElementCollectionSourceGenerationContext.Default.JsonElementArray); } return results; } private static ProcessDataStandardFormat GetProcessDataStandardFormat(ILogger logger, ProcessDataStandardFormatMapping processDataStandardFormatMapping, JsonElement[] jsonElements, ProcessDataStandardFormat processDataStandardFormat) { ProcessDataStandardFormat result; int column; string value; List values = []; List results = []; JsonProperty jsonProperty; JsonProperty[] jsonProperties; List unknownColumns = []; for (int i = 0; i < jsonElements.Length; i++) { values.Clear(); if (jsonElements[i].ValueKind != JsonValueKind.Object) { unknownColumns.Add(string.Empty); break; } jsonProperties = jsonElements[i].EnumerateObject().ToArray(); if (jsonProperties.Length != processDataStandardFormatMapping.NewColumnNames.Count) { logger.LogWarning("{jsonProperties} != {NewColumnNames}", jsonProperties.Length, processDataStandardFormatMapping.NewColumnNames.Count); continue; } for (int c = 0; c < processDataStandardFormatMapping.ColumnIndices.Count; c++) { column = processDataStandardFormatMapping.ColumnIndices[c]; if (column == -1) value = processDataStandardFormatMapping.OldColumnNames[c]; else { jsonProperty = jsonProperties[column]; value = jsonProperty.Value.ToString(); } values.Add(value); } results.Add(string.Join('\t', values)); } result = new(Body: new(results), Columns: processDataStandardFormatMapping.OldColumnNames, Logistics: processDataStandardFormat.Logistics, Sequence: processDataStandardFormat.Sequence); return result; } private static void Write(ILogger logger, FileInfo fileInfo, ProcessDataStandardFormat processDataStandardFormat) { List results = []; if (processDataStandardFormat.Sequence is null) throw new NullReferenceException(nameof(processDataStandardFormat.Sequence)); string endOffset = "E#######T"; string dataOffset = "D#######T"; string headerOffset = "H#######T"; string format = "MM/dd/yyyy HH:mm:ss"; string startTime = new DateTime(processDataStandardFormat.Sequence.Value).ToString(format); results.Add("HEADER_TAG\tHEADER_VALUE"); results.Add("FORMAT\t2.00"); results.Add("NUMBER_PASSES\t0001"); results.Add($"HEADER_OFFSET\t{headerOffset}"); results.Add($"DATA_OFFSET\t{dataOffset}"); results.Add($"END_OFFSET\t{endOffset}"); results.Add($"\"{string.Join("\"\t\"", processDataStandardFormat.Columns)}\""); results.AddRange(processDataStandardFormat.Body); results.Add($"NUM_DATA_ROWS\t{processDataStandardFormat.Body.Count.ToString().PadLeft(9, '0')}"); results.Add($"NUM_DATA_COLUMNS\t{processDataStandardFormat.Columns.Count.ToString().PadLeft(9, '0')}"); results.Add("DELIMITER\t;"); results.Add($"START_TIME_FORMAT\t{format}"); results.Add($"START_TIME\t{startTime}"); results.Add("LOGISTICS_COLUMN\tA_LOGISTICS"); results.Add("LOGISTICS_COLUMN\tB_LOGISTICS"); results.AddRange(processDataStandardFormat.Logistics); File.WriteAllText($"{fileInfo.FullName}.tsv", string.Join(Environment.NewLine, results)); logger.LogDebug("<{fileInfo}>", fileInfo); } }