365 lines
18 KiB
C#
365 lines
18 KiB
C#
using System.Collections.ObjectModel;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace File_Folder_Helper.ADO2025.PI5;
|
|
|
|
internal static partial class Helper20250219 {
|
|
|
|
private record ProcessDataStandardFormat(ReadOnlyCollection<string> Body,
|
|
ReadOnlyCollection<string> Columns,
|
|
ReadOnlyCollection<string> Logistics,
|
|
long? Sequence);
|
|
|
|
[JsonSourceGenerationOptions(WriteIndented = true)]
|
|
[JsonSerializable(typeof(JsonElement[]))]
|
|
private partial class JsonElementCollectionSourceGenerationContext : JsonSerializerContext {
|
|
}
|
|
|
|
private record Input(ReadOnlyCollection<string> Backfill,
|
|
ReadOnlyCollection<int> ColumnIndices,
|
|
ReadOnlyCollection<string> Ignore,
|
|
ReadOnlyCollection<string> IndexOnly,
|
|
ReadOnlyDictionary<string, string> KeyValuePairs,
|
|
ReadOnlyCollection<string> NewColumnNames,
|
|
ReadOnlyCollection<string> OldColumnNames);
|
|
|
|
internal static void Compare(ILogger<Worker> logger, List<string> args) {
|
|
string[] segmentsB;
|
|
List<string> distinct = [];
|
|
string searchPattern = args[2];
|
|
string searchPatternB = args[3];
|
|
string[] segments = args[7].Split(',');
|
|
Dictionary<string, string> keyValuePairs = [];
|
|
ReadOnlyCollection<string> ignore = args[4].Split(',').AsReadOnly();
|
|
ReadOnlyCollection<string> backfill = args[5].Split(',').AsReadOnly();
|
|
ReadOnlyCollection<string> indexOnly = args[6].Split(',').AsReadOnly();
|
|
ReadOnlyCollection<string> oldColumnNames = args[8].Split(',').AsReadOnly();
|
|
ReadOnlyCollection<string> newColumnNames = args[9].Split(',').AsReadOnly();
|
|
ReadOnlyCollection<int> columnIndices = args[10].Split(',').Select(int.Parse).ToArray().AsReadOnly();
|
|
foreach (string segment in segments) {
|
|
segmentsB = segment.Split('|');
|
|
if (segmentsB.Length != 2)
|
|
continue;
|
|
if (distinct.Contains(segmentsB[0]))
|
|
continue;
|
|
distinct.Add(segmentsB[0]);
|
|
keyValuePairs.Add(segmentsB[0], segmentsB[1]);
|
|
}
|
|
Input input = new(Backfill: backfill,
|
|
ColumnIndices: columnIndices,
|
|
NewColumnNames: newColumnNames,
|
|
Ignore: ignore,
|
|
IndexOnly: indexOnly,
|
|
KeyValuePairs: keyValuePairs.AsReadOnly(),
|
|
OldColumnNames: oldColumnNames);
|
|
string sourceDirectory = Path.GetFullPath(args[0]);
|
|
string[] files = Directory.GetFiles(sourceDirectory, searchPattern, SearchOption.AllDirectories);
|
|
logger.LogInformation("<{files}>(s)", files.Length);
|
|
Compare(logger, sourceDirectory.Length, searchPatternB, input, files);
|
|
}
|
|
|
|
private static void Compare(ILogger<Worker> logger, int sourceDirectoryLength, string searchPattern, Input input, string[] files) {
|
|
bool compare;
|
|
string directory;
|
|
string[] matches;
|
|
string directorySegment;
|
|
string[] directoryFiles;
|
|
const int columnsLine = 6;
|
|
JsonElement[]? jsonElementsNew;
|
|
JsonElement[]? jsonElementsOld;
|
|
ProcessDataStandardFormat processDataStandardFormat;
|
|
FileInfo[] collection = files.Select(l => new FileInfo(l)).ToArray();
|
|
foreach (FileInfo fileInfo in collection) {
|
|
directory = fileInfo.DirectoryName ?? throw new Exception();
|
|
directoryFiles = Directory.GetFiles(directory, searchPattern, SearchOption.TopDirectoryOnly);
|
|
matches = (from l in directoryFiles where l != fileInfo.FullName select l).ToArray();
|
|
if (matches.Length < 1)
|
|
continue;
|
|
directorySegment = directory[sourceDirectoryLength..];
|
|
processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, input.NewColumnNames.Count, columnsLine, fileInfo.FullName, lines: null);
|
|
jsonElementsNew = GetArray(logger, input.NewColumnNames.Count, processDataStandardFormat, lookForNumbers: false);
|
|
if (jsonElementsNew is null)
|
|
continue;
|
|
if (input.OldColumnNames.Count == input.ColumnIndices.Count) {
|
|
processDataStandardFormat = Get(logger, input, jsonElementsNew, processDataStandardFormat);
|
|
Write(logger, fileInfo, processDataStandardFormat);
|
|
}
|
|
foreach (string match in matches) {
|
|
processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, input.OldColumnNames.Count, columnsLine, match, lines: null);
|
|
jsonElementsOld = GetArray(logger, input.OldColumnNames.Count, processDataStandardFormat, lookForNumbers: false);
|
|
if (jsonElementsOld is null || jsonElementsOld.Length != jsonElementsNew.Length) {
|
|
logger.LogWarning("! <{match}> (jsonElementsOld.Length:{jsonElementsOld} != jsonElementsNew.Length:{jsonElementsNew})", match, jsonElementsOld?.Length, jsonElementsNew.Length);
|
|
continue;
|
|
}
|
|
compare = Compare(logger, input, directorySegment, jsonElementsNew, jsonElementsOld);
|
|
if (!compare) {
|
|
logger.LogWarning("! <{match}>", match);
|
|
continue;
|
|
}
|
|
logger.LogInformation("<{match}>", match);
|
|
}
|
|
}
|
|
}
|
|
|
|
private static bool Compare(ILogger<Worker> logger, Input input, string directory, JsonElement[] jsonElementsNew, JsonElement[] jsonElementsOld) {
|
|
bool result;
|
|
int? q;
|
|
string valueNew;
|
|
string valueOld;
|
|
List<string> columns = [];
|
|
JsonProperty jsonPropertyOld;
|
|
JsonProperty jsonPropertyNew;
|
|
JsonProperty[] jsonPropertiesOld;
|
|
JsonProperty[] jsonPropertiesNew;
|
|
List<string> unknownColumns = [];
|
|
List<string> differentColumns = [];
|
|
int last = jsonElementsOld.Length - 1;
|
|
List<string> sameAfterSpaceSplitColumns = [];
|
|
for (int i = last; i > 0; i--) {
|
|
if (jsonElementsOld[i].ValueKind != JsonValueKind.Object) {
|
|
unknownColumns.Add(string.Empty);
|
|
break;
|
|
}
|
|
jsonPropertiesOld = jsonElementsOld[i].EnumerateObject().ToArray();
|
|
jsonPropertiesNew = jsonElementsNew[i].EnumerateObject().ToArray();
|
|
for (int p = 0; p < jsonPropertiesOld.Length; p++) {
|
|
jsonPropertyOld = jsonPropertiesOld[p];
|
|
valueOld = jsonPropertyOld.Value.ToString();
|
|
if (input.KeyValuePairs.TryGetValue(jsonPropertyOld.Name, out string? name) && !string.IsNullOrEmpty(name)) {
|
|
q = TryGetPropertyIndex(jsonPropertiesNew, name);
|
|
if (q is null && i == 0)
|
|
unknownColumns.Add($"{jsonPropertyOld.Name}|{name}");
|
|
} else {
|
|
q = TryGetPropertyIndex(jsonPropertiesNew, jsonPropertyOld.Name);
|
|
if (q is null) {
|
|
if (i == 0)
|
|
unknownColumns.Add(jsonPropertyOld.Name);
|
|
}
|
|
}
|
|
if (q is null) {
|
|
if (input.Ignore.Contains(jsonPropertyOld.Name)) {
|
|
if (i == last) {
|
|
columns.Add("-1");
|
|
logger.LogDebug("{p} )) {jsonPropertyOld.Name} **", p, jsonPropertyOld.Name);
|
|
}
|
|
continue;
|
|
}
|
|
if (i == last) {
|
|
columns.Add("-1");
|
|
if (!string.IsNullOrEmpty(valueOld))
|
|
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ??", p, jsonPropertyOld.Name);
|
|
}
|
|
} else {
|
|
if (i == last)
|
|
columns.Add(q.Value.ToString());
|
|
jsonPropertyNew = jsonPropertiesNew[q.Value];
|
|
valueNew = jsonPropertyNew.Value.ToString();
|
|
if (i == last)
|
|
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ~~ {q.Value} => {jsonPropertyNew.Name}", p, jsonPropertyOld.Name, q.Value, jsonPropertyNew.Name);
|
|
if (valueNew != valueOld && !differentColumns.Contains(jsonPropertyOld.Name)) {
|
|
if (valueNew.Length >= 2 && valueNew.Split(' ')[0] == valueOld)
|
|
sameAfterSpaceSplitColumns.Add(jsonPropertyOld.Name);
|
|
else {
|
|
if (input.Backfill.Contains(jsonPropertyOld.Name) && i != last)
|
|
continue;
|
|
if (input.IndexOnly.Contains(jsonPropertyOld.Name) && int.TryParse(jsonPropertyOld.Name[^2..], out int index) && i != index - 1)
|
|
continue;
|
|
logger.LogWarning("For [{jsonProperty.Name}] <{directory}> doesn't match (valueNew:{valueNew} != valueOld:{valueOld})!", jsonPropertyOld.Name, directory, valueNew, valueOld);
|
|
differentColumns.Add(jsonPropertyOld.Name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (i == last)
|
|
logger.LogInformation(string.Join(',', columns));
|
|
}
|
|
result = unknownColumns.Count == 0 && differentColumns.Count == 0 && sameAfterSpaceSplitColumns.Count == 0;
|
|
return result;
|
|
}
|
|
|
|
private static int? TryGetPropertyIndex(JsonProperty[] jsonProperties, string propertyName) {
|
|
int? result = null;
|
|
for (int i = 0; i < jsonProperties.Length; i++) {
|
|
if (jsonProperties[i].Name != propertyName)
|
|
continue;
|
|
result = i;
|
|
break;
|
|
}
|
|
if (result is null) {
|
|
for (int i = 0; i < jsonProperties.Length; i++) {
|
|
if (jsonProperties[i].Name[0] != propertyName[0])
|
|
continue;
|
|
if (jsonProperties[i].Name.Length != propertyName.Length)
|
|
continue;
|
|
if (jsonProperties[i].Name != propertyName)
|
|
continue;
|
|
result = i;
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private static ProcessDataStandardFormat GetProcessDataStandardFormat(ILogger<Worker> logger, DateTime lastWriteTime, int expectedColumns, int columnsLine, string path, string[]? lines) {
|
|
ProcessDataStandardFormat result;
|
|
long sequence;
|
|
string[] segments;
|
|
List<string> body = [];
|
|
List<string> logistics = [];
|
|
bool lookForLogistics = false;
|
|
lines ??= File.ReadAllLines(path);
|
|
if (lines.Length <= columnsLine)
|
|
segments = [];
|
|
else {
|
|
segments = lines[columnsLine].Split('\t');
|
|
if (segments.Length != expectedColumns) {
|
|
logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns);
|
|
segments = [];
|
|
}
|
|
}
|
|
string[] columns = segments.Select(l => l.Trim('"')).ToArray();
|
|
for (int r = columnsLine + 1; r < lines.Length; r++) {
|
|
if (lines[r].StartsWith("NUM_DATA_ROWS"))
|
|
lookForLogistics = true;
|
|
if (!lookForLogistics) {
|
|
body.Add(lines[r]);
|
|
continue;
|
|
}
|
|
if (lines[r].StartsWith("LOGISTICS_1")) {
|
|
for (int i = r; i < lines.Length; i++) {
|
|
if (lines[r].StartsWith("END_HEADER"))
|
|
break;
|
|
logistics.Add(lines[i]);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (logistics.Count == 0)
|
|
sequence = lastWriteTime.Ticks;
|
|
else {
|
|
segments = logistics[0].Split("SEQUENCE=");
|
|
sequence = segments.Length < 2 || !long.TryParse(segments[1].Split(';')[0], out long s) ? lastWriteTime.Ticks : s;
|
|
}
|
|
result = new(Body: body.AsReadOnly(),
|
|
Columns: columns.AsReadOnly(),
|
|
Logistics: logistics.AsReadOnly(),
|
|
Sequence: sequence);
|
|
return result;
|
|
}
|
|
|
|
private static JsonElement[]? GetArray(ILogger<Worker> logger, int expectedColumns, ProcessDataStandardFormat processDataStandardFormat, bool lookForNumbers) {
|
|
JsonElement[]? results;
|
|
if (processDataStandardFormat.Body.Count == 0 || !processDataStandardFormat.Body[0].Contains('\t'))
|
|
results = JsonSerializer.Deserialize("[]", JsonElementCollectionSourceGenerationContext.Default.JsonElementArray) ?? throw new Exception();
|
|
else {
|
|
string value;
|
|
string[] segments;
|
|
List<string> lines = [];
|
|
StringBuilder stringBuilder = new();
|
|
foreach (string bodyLine in processDataStandardFormat.Body) {
|
|
_ = stringBuilder.Clear();
|
|
_ = stringBuilder.Append('{');
|
|
segments = bodyLine.Split('\t');
|
|
if (segments.Length != expectedColumns) {
|
|
logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns);
|
|
continue;
|
|
}
|
|
if (!lookForNumbers) {
|
|
for (int c = 0; c < segments.Length; c++) {
|
|
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
|
|
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
|
|
}
|
|
} else {
|
|
for (int c = 0; c < segments.Length; c++) {
|
|
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
|
|
if (string.IsNullOrEmpty(value))
|
|
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append("null,");
|
|
else if (value.All(char.IsDigit))
|
|
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append(',');
|
|
else
|
|
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
|
|
}
|
|
}
|
|
_ = stringBuilder.Remove(stringBuilder.Length - 1, 1);
|
|
_ = stringBuilder.AppendLine("}");
|
|
lines.Add(stringBuilder.ToString());
|
|
}
|
|
string json = $"[{string.Join(',', lines)}]";
|
|
results = JsonSerializer.Deserialize(json, JsonElementCollectionSourceGenerationContext.Default.JsonElementArray);
|
|
}
|
|
return results;
|
|
}
|
|
|
|
private static ProcessDataStandardFormat Get(ILogger<Worker> logger, Input input, JsonElement[] jsonElements, ProcessDataStandardFormat processDataStandardFormat) {
|
|
ProcessDataStandardFormat result;
|
|
int column;
|
|
string value;
|
|
List<string> values = [];
|
|
List<string> results = [];
|
|
JsonProperty jsonProperty;
|
|
JsonProperty[] jsonProperties;
|
|
List<string> unknownColumns = [];
|
|
for (int i = 0; i < jsonElements.Length; i++) {
|
|
values.Clear();
|
|
if (jsonElements[i].ValueKind != JsonValueKind.Object) {
|
|
unknownColumns.Add(string.Empty);
|
|
break;
|
|
}
|
|
jsonProperties = jsonElements[i].EnumerateObject().ToArray();
|
|
if (jsonProperties.Length != input.NewColumnNames.Count) {
|
|
logger.LogWarning("{jsonProperties} != {NewColumnNames}", jsonProperties.Length, input.NewColumnNames.Count);
|
|
continue;
|
|
}
|
|
for (int c = 0; c < input.ColumnIndices.Count; c++) {
|
|
column = input.ColumnIndices[c];
|
|
if (column == -1)
|
|
value = input.OldColumnNames[c];
|
|
else {
|
|
jsonProperty = jsonProperties[column];
|
|
value = jsonProperty.Value.ToString();
|
|
}
|
|
values.Add(value);
|
|
}
|
|
results.Add(string.Join('\t', values));
|
|
}
|
|
result = new(Body: new(results),
|
|
Columns: processDataStandardFormat.Columns,
|
|
Logistics: processDataStandardFormat.Logistics,
|
|
Sequence: processDataStandardFormat.Sequence);
|
|
return result;
|
|
}
|
|
|
|
private static void Write(ILogger<Worker> logger, FileInfo fileInfo, ProcessDataStandardFormat processDataStandardFormat) {
|
|
List<string> results = [];
|
|
if (processDataStandardFormat.Sequence is null)
|
|
throw new NullReferenceException(nameof(processDataStandardFormat.Sequence));
|
|
string endOffset = "E#######T";
|
|
string dataOffset = "D#######T";
|
|
string headerOffset = "H#######T";
|
|
string format = "MM/dd/yyyy HH:mm:ss";
|
|
string startTime = new DateTime(processDataStandardFormat.Sequence.Value).ToString(format);
|
|
results.Add("HEADER_TAG\tHEADER_VALUE");
|
|
results.Add("FORMAT\t2.00");
|
|
results.Add("NUMBER_PASSES\t0001");
|
|
results.Add($"HEADER_OFFSET\t{headerOffset}");
|
|
results.Add($"DATA_OFFSET\t{dataOffset}");
|
|
results.Add($"END_OFFSET\t{endOffset}");
|
|
results.Add($"\"{string.Join("\",\t\"", processDataStandardFormat.Columns)}\"");
|
|
results.AddRange(processDataStandardFormat.Body);
|
|
results.Add($"NUM_DATA_ROWS\t{processDataStandardFormat.Body.Count.ToString().PadLeft(9, '0')}");
|
|
results.Add($"NUM_DATA_COLUMNS\t{processDataStandardFormat.Columns.Count.ToString().PadLeft(9, '0')}");
|
|
results.Add("DELIMITER\t;");
|
|
results.Add($"START_TIME_FORMAT\t{format}");
|
|
results.Add($"START_TIME\t{startTime}");
|
|
results.Add("LOGISTICS_COLUMN\tA_LOGISTICS");
|
|
results.Add("LOGISTICS_COLUMN\tB_LOGISTICS");
|
|
results.AddRange(processDataStandardFormat.Logistics);
|
|
File.WriteAllText($"{fileInfo.FullName}.tsv", string.Join(Environment.NewLine, results));
|
|
logger.LogDebug("<{fileInfo}>", fileInfo);
|
|
}
|
|
|
|
} |