file-folder-helper/ADO2025/PI5/Helper-2025-02-19.cs
Mike Phares 919279a917 javascript methods for sequence to readable date
c# like java for PI5

Helper 2025-02-19 more updates for Compare
2025-03-26 17:02:35 -07:00

365 lines
18 KiB
C#

using System.Collections.ObjectModel;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace File_Folder_Helper.ADO2025.PI5;
internal static partial class Helper20250219 {
private record ProcessDataStandardFormat(ReadOnlyCollection<string> Body,
ReadOnlyCollection<string> Columns,
ReadOnlyCollection<string> Logistics,
long? Sequence);
[JsonSourceGenerationOptions(WriteIndented = true)]
[JsonSerializable(typeof(JsonElement[]))]
private partial class JsonElementCollectionSourceGenerationContext : JsonSerializerContext {
}
private record Input(ReadOnlyCollection<string> Backfill,
ReadOnlyCollection<int> ColumnIndices,
ReadOnlyCollection<string> Ignore,
ReadOnlyCollection<string> IndexOnly,
ReadOnlyDictionary<string, string> KeyValuePairs,
ReadOnlyCollection<string> NewColumnNames,
ReadOnlyCollection<string> OldColumnNames);
internal static void Compare(ILogger<Worker> logger, List<string> args) {
string[] segmentsB;
List<string> distinct = [];
string searchPattern = args[2];
string searchPatternB = args[3];
string[] segments = args[7].Split(',');
Dictionary<string, string> keyValuePairs = [];
ReadOnlyCollection<string> ignore = args[4].Split(',').AsReadOnly();
ReadOnlyCollection<string> backfill = args[5].Split(',').AsReadOnly();
ReadOnlyCollection<string> indexOnly = args[6].Split(',').AsReadOnly();
ReadOnlyCollection<string> oldColumnNames = args[8].Split(',').AsReadOnly();
ReadOnlyCollection<string> newColumnNames = args[9].Split(',').AsReadOnly();
ReadOnlyCollection<int> columnIndices = args[10].Split(',').Select(int.Parse).ToArray().AsReadOnly();
foreach (string segment in segments) {
segmentsB = segment.Split('|');
if (segmentsB.Length != 2)
continue;
if (distinct.Contains(segmentsB[0]))
continue;
distinct.Add(segmentsB[0]);
keyValuePairs.Add(segmentsB[0], segmentsB[1]);
}
Input input = new(Backfill: backfill,
ColumnIndices: columnIndices,
NewColumnNames: newColumnNames,
Ignore: ignore,
IndexOnly: indexOnly,
KeyValuePairs: keyValuePairs.AsReadOnly(),
OldColumnNames: oldColumnNames);
string sourceDirectory = Path.GetFullPath(args[0]);
string[] files = Directory.GetFiles(sourceDirectory, searchPattern, SearchOption.AllDirectories);
logger.LogInformation("<{files}>(s)", files.Length);
Compare(logger, sourceDirectory.Length, searchPatternB, input, files);
}
private static void Compare(ILogger<Worker> logger, int sourceDirectoryLength, string searchPattern, Input input, string[] files) {
bool compare;
string directory;
string[] matches;
string directorySegment;
string[] directoryFiles;
const int columnsLine = 6;
JsonElement[]? jsonElementsNew;
JsonElement[]? jsonElementsOld;
ProcessDataStandardFormat processDataStandardFormat;
FileInfo[] collection = files.Select(l => new FileInfo(l)).ToArray();
foreach (FileInfo fileInfo in collection) {
directory = fileInfo.DirectoryName ?? throw new Exception();
directoryFiles = Directory.GetFiles(directory, searchPattern, SearchOption.TopDirectoryOnly);
matches = (from l in directoryFiles where l != fileInfo.FullName select l).ToArray();
if (matches.Length < 1)
continue;
directorySegment = directory[sourceDirectoryLength..];
processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, input.NewColumnNames.Count, columnsLine, fileInfo.FullName, lines: null);
jsonElementsNew = GetArray(logger, input.NewColumnNames.Count, processDataStandardFormat, lookForNumbers: false);
if (jsonElementsNew is null)
continue;
if (input.OldColumnNames.Count == input.ColumnIndices.Count) {
processDataStandardFormat = Get(logger, input, jsonElementsNew, processDataStandardFormat);
Write(logger, fileInfo, processDataStandardFormat);
}
foreach (string match in matches) {
processDataStandardFormat = GetProcessDataStandardFormat(logger, fileInfo.LastWriteTime, input.OldColumnNames.Count, columnsLine, match, lines: null);
jsonElementsOld = GetArray(logger, input.OldColumnNames.Count, processDataStandardFormat, lookForNumbers: false);
if (jsonElementsOld is null || jsonElementsOld.Length != jsonElementsNew.Length) {
logger.LogWarning("! <{match}> (jsonElementsOld.Length:{jsonElementsOld} != jsonElementsNew.Length:{jsonElementsNew})", match, jsonElementsOld?.Length, jsonElementsNew.Length);
continue;
}
compare = Compare(logger, input, directorySegment, jsonElementsNew, jsonElementsOld);
if (!compare) {
logger.LogWarning("! <{match}>", match);
continue;
}
logger.LogInformation("<{match}>", match);
}
}
}
private static bool Compare(ILogger<Worker> logger, Input input, string directory, JsonElement[] jsonElementsNew, JsonElement[] jsonElementsOld) {
bool result;
int? q;
string valueNew;
string valueOld;
List<string> columns = [];
JsonProperty jsonPropertyOld;
JsonProperty jsonPropertyNew;
JsonProperty[] jsonPropertiesOld;
JsonProperty[] jsonPropertiesNew;
List<string> unknownColumns = [];
List<string> differentColumns = [];
int last = jsonElementsOld.Length - 1;
List<string> sameAfterSpaceSplitColumns = [];
for (int i = last; i > 0; i--) {
if (jsonElementsOld[i].ValueKind != JsonValueKind.Object) {
unknownColumns.Add(string.Empty);
break;
}
jsonPropertiesOld = jsonElementsOld[i].EnumerateObject().ToArray();
jsonPropertiesNew = jsonElementsNew[i].EnumerateObject().ToArray();
for (int p = 0; p < jsonPropertiesOld.Length; p++) {
jsonPropertyOld = jsonPropertiesOld[p];
valueOld = jsonPropertyOld.Value.ToString();
if (input.KeyValuePairs.TryGetValue(jsonPropertyOld.Name, out string? name) && !string.IsNullOrEmpty(name)) {
q = TryGetPropertyIndex(jsonPropertiesNew, name);
if (q is null && i == 0)
unknownColumns.Add($"{jsonPropertyOld.Name}|{name}");
} else {
q = TryGetPropertyIndex(jsonPropertiesNew, jsonPropertyOld.Name);
if (q is null) {
if (i == 0)
unknownColumns.Add(jsonPropertyOld.Name);
}
}
if (q is null) {
if (input.Ignore.Contains(jsonPropertyOld.Name)) {
if (i == last) {
columns.Add("-1");
logger.LogDebug("{p} )) {jsonPropertyOld.Name} **", p, jsonPropertyOld.Name);
}
continue;
}
if (i == last) {
columns.Add("-1");
if (!string.IsNullOrEmpty(valueOld))
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ??", p, jsonPropertyOld.Name);
}
} else {
if (i == last)
columns.Add(q.Value.ToString());
jsonPropertyNew = jsonPropertiesNew[q.Value];
valueNew = jsonPropertyNew.Value.ToString();
if (i == last)
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ~~ {q.Value} => {jsonPropertyNew.Name}", p, jsonPropertyOld.Name, q.Value, jsonPropertyNew.Name);
if (valueNew != valueOld && !differentColumns.Contains(jsonPropertyOld.Name)) {
if (valueNew.Length >= 2 && valueNew.Split(' ')[0] == valueOld)
sameAfterSpaceSplitColumns.Add(jsonPropertyOld.Name);
else {
if (input.Backfill.Contains(jsonPropertyOld.Name) && i != last)
continue;
if (input.IndexOnly.Contains(jsonPropertyOld.Name) && int.TryParse(jsonPropertyOld.Name[^2..], out int index) && i != index - 1)
continue;
logger.LogWarning("For [{jsonProperty.Name}] <{directory}> doesn't match (valueNew:{valueNew} != valueOld:{valueOld})!", jsonPropertyOld.Name, directory, valueNew, valueOld);
differentColumns.Add(jsonPropertyOld.Name);
}
}
}
}
if (i == last)
logger.LogInformation(string.Join(',', columns));
}
result = unknownColumns.Count == 0 && differentColumns.Count == 0 && sameAfterSpaceSplitColumns.Count == 0;
return result;
}
private static int? TryGetPropertyIndex(JsonProperty[] jsonProperties, string propertyName) {
int? result = null;
for (int i = 0; i < jsonProperties.Length; i++) {
if (jsonProperties[i].Name != propertyName)
continue;
result = i;
break;
}
if (result is null) {
for (int i = 0; i < jsonProperties.Length; i++) {
if (jsonProperties[i].Name[0] != propertyName[0])
continue;
if (jsonProperties[i].Name.Length != propertyName.Length)
continue;
if (jsonProperties[i].Name != propertyName)
continue;
result = i;
break;
}
}
return result;
}
private static ProcessDataStandardFormat GetProcessDataStandardFormat(ILogger<Worker> logger, DateTime lastWriteTime, int expectedColumns, int columnsLine, string path, string[]? lines) {
ProcessDataStandardFormat result;
long sequence;
string[] segments;
List<string> body = [];
List<string> logistics = [];
bool lookForLogistics = false;
lines ??= File.ReadAllLines(path);
if (lines.Length <= columnsLine)
segments = [];
else {
segments = lines[columnsLine].Split('\t');
if (segments.Length != expectedColumns) {
logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns);
segments = [];
}
}
string[] columns = segments.Select(l => l.Trim('"')).ToArray();
for (int r = columnsLine + 1; r < lines.Length; r++) {
if (lines[r].StartsWith("NUM_DATA_ROWS"))
lookForLogistics = true;
if (!lookForLogistics) {
body.Add(lines[r]);
continue;
}
if (lines[r].StartsWith("LOGISTICS_1")) {
for (int i = r; i < lines.Length; i++) {
if (lines[r].StartsWith("END_HEADER"))
break;
logistics.Add(lines[i]);
}
break;
}
}
if (logistics.Count == 0)
sequence = lastWriteTime.Ticks;
else {
segments = logistics[0].Split("SEQUENCE=");
sequence = segments.Length < 2 || !long.TryParse(segments[1].Split(';')[0], out long s) ? lastWriteTime.Ticks : s;
}
result = new(Body: body.AsReadOnly(),
Columns: columns.AsReadOnly(),
Logistics: logistics.AsReadOnly(),
Sequence: sequence);
return result;
}
private static JsonElement[]? GetArray(ILogger<Worker> logger, int expectedColumns, ProcessDataStandardFormat processDataStandardFormat, bool lookForNumbers) {
JsonElement[]? results;
if (processDataStandardFormat.Body.Count == 0 || !processDataStandardFormat.Body[0].Contains('\t'))
results = JsonSerializer.Deserialize("[]", JsonElementCollectionSourceGenerationContext.Default.JsonElementArray) ?? throw new Exception();
else {
string value;
string[] segments;
List<string> lines = [];
StringBuilder stringBuilder = new();
foreach (string bodyLine in processDataStandardFormat.Body) {
_ = stringBuilder.Clear();
_ = stringBuilder.Append('{');
segments = bodyLine.Split('\t');
if (segments.Length != expectedColumns) {
logger.LogWarning("{segments} != {expectedColumns}", segments.Length, expectedColumns);
continue;
}
if (!lookForNumbers) {
for (int c = 0; c < segments.Length; c++) {
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
}
} else {
for (int c = 0; c < segments.Length; c++) {
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
if (string.IsNullOrEmpty(value))
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append("null,");
else if (value.All(char.IsDigit))
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append(',');
else
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
}
}
_ = stringBuilder.Remove(stringBuilder.Length - 1, 1);
_ = stringBuilder.AppendLine("}");
lines.Add(stringBuilder.ToString());
}
string json = $"[{string.Join(',', lines)}]";
results = JsonSerializer.Deserialize(json, JsonElementCollectionSourceGenerationContext.Default.JsonElementArray);
}
return results;
}
private static ProcessDataStandardFormat Get(ILogger<Worker> logger, Input input, JsonElement[] jsonElements, ProcessDataStandardFormat processDataStandardFormat) {
ProcessDataStandardFormat result;
int column;
string value;
List<string> values = [];
List<string> results = [];
JsonProperty jsonProperty;
JsonProperty[] jsonProperties;
List<string> unknownColumns = [];
for (int i = 0; i < jsonElements.Length; i++) {
values.Clear();
if (jsonElements[i].ValueKind != JsonValueKind.Object) {
unknownColumns.Add(string.Empty);
break;
}
jsonProperties = jsonElements[i].EnumerateObject().ToArray();
if (jsonProperties.Length != input.NewColumnNames.Count) {
logger.LogWarning("{jsonProperties} != {NewColumnNames}", jsonProperties.Length, input.NewColumnNames.Count);
continue;
}
for (int c = 0; c < input.ColumnIndices.Count; c++) {
column = input.ColumnIndices[c];
if (column == -1)
value = input.OldColumnNames[c];
else {
jsonProperty = jsonProperties[column];
value = jsonProperty.Value.ToString();
}
values.Add(value);
}
results.Add(string.Join('\t', values));
}
result = new(Body: new(results),
Columns: processDataStandardFormat.Columns,
Logistics: processDataStandardFormat.Logistics,
Sequence: processDataStandardFormat.Sequence);
return result;
}
private static void Write(ILogger<Worker> logger, FileInfo fileInfo, ProcessDataStandardFormat processDataStandardFormat) {
List<string> results = [];
if (processDataStandardFormat.Sequence is null)
throw new NullReferenceException(nameof(processDataStandardFormat.Sequence));
string endOffset = "E#######T";
string dataOffset = "D#######T";
string headerOffset = "H#######T";
string format = "MM/dd/yyyy HH:mm:ss";
string startTime = new DateTime(processDataStandardFormat.Sequence.Value).ToString(format);
results.Add("HEADER_TAG\tHEADER_VALUE");
results.Add("FORMAT\t2.00");
results.Add("NUMBER_PASSES\t0001");
results.Add($"HEADER_OFFSET\t{headerOffset}");
results.Add($"DATA_OFFSET\t{dataOffset}");
results.Add($"END_OFFSET\t{endOffset}");
results.Add($"\"{string.Join("\",\t\"", processDataStandardFormat.Columns)}\"");
results.AddRange(processDataStandardFormat.Body);
results.Add($"NUM_DATA_ROWS\t{processDataStandardFormat.Body.Count.ToString().PadLeft(9, '0')}");
results.Add($"NUM_DATA_COLUMNS\t{processDataStandardFormat.Columns.Count.ToString().PadLeft(9, '0')}");
results.Add("DELIMITER\t;");
results.Add($"START_TIME_FORMAT\t{format}");
results.Add($"START_TIME\t{startTime}");
results.Add("LOGISTICS_COLUMN\tA_LOGISTICS");
results.Add("LOGISTICS_COLUMN\tB_LOGISTICS");
results.AddRange(processDataStandardFormat.Logistics);
File.WriteAllText($"{fileInfo.FullName}.tsv", string.Join(Environment.NewLine, results));
logger.LogDebug("<{fileInfo}>", fileInfo);
}
}