file-folder-helper/ADO2025/PI5/Helper-2025-02-19.cs
2025-02-24 17:46:47 -07:00

281 lines
12 KiB
C#

using Microsoft.Extensions.Logging;
using System.Collections.ObjectModel;
using System.Text;
using System.Text.Json;
namespace File_Folder_Helper.ADO2025.PI5;
internal static partial class Helper20250219
{
private record ProcessDataStandardFormat(ReadOnlyCollection<string> Body,
ReadOnlyCollection<string> Columns,
string Logistics);
private static ProcessDataStandardFormat GetLogisticsColumnsAndBody(string path, string[]? lines)
{
ProcessDataStandardFormat result;
string segment;
List<string> body = [];
List<string> columns = [];
StringBuilder logistics = new();
lines ??= File.ReadAllLines(path);
string[] segments;
if (lines.Length < 7)
segments = [];
else
segments = lines[6].Trim().Split('\t');
for (int c = 0; c < segments.Length; c++)
{
segment = segments[c][1..^1];
if (!columns.Contains(segment))
columns.Add(segment);
else
{
for (short i = 1; i < short.MaxValue; i++)
{
segment = string.Concat(segment, "_", i);
if (!columns.Contains(segment))
{
columns.Add(segment);
break;
}
}
}
}
bool lookForLogistics = false;
for (int r = 7; r < lines.Length; r++)
{
if (lines[r].StartsWith("NUM_DATA_ROWS"))
lookForLogistics = true;
if (!lookForLogistics)
{
body.Add(lines[r]);
continue;
}
if (lines[r].StartsWith("LOGISTICS_1"))
{
for (int i = r; i < lines.Length; i++)
{
if (lines[r].StartsWith("END_HEADER"))
break;
_ = logistics.AppendLine(lines[i]);
}
break;
}
}
result = new(Body: body.AsReadOnly(),
Columns: columns.AsReadOnly(),
logistics.ToString());
return result;
}
private static JsonElement[]? GetArray(ProcessDataStandardFormat processDataStandardFormat, bool lookForNumbers = false)
{
JsonElement[]? results;
if (processDataStandardFormat.Body.Count == 0 || !processDataStandardFormat.Body[0].Contains('\t'))
results = JsonSerializer.Deserialize<JsonElement[]>("[]") ?? throw new Exception();
else
{
string value;
string[] segments;
List<string> lines = [];
StringBuilder stringBuilder = new();
foreach (string bodyLine in processDataStandardFormat.Body)
{
_ = stringBuilder.Clear();
_ = stringBuilder.Append('{');
segments = bodyLine.Trim().Split('\t');
if (!lookForNumbers)
{
for (int c = 1; c < segments.Length; c++)
{
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
}
}
else
{
for (int c = 1; c < segments.Length; c++)
{
value = segments[c].Replace("\"", "\\\"").Replace("\\", "\\\\");
if (string.IsNullOrEmpty(value))
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append("null,");
else if (value.All(char.IsDigit))
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":").Append(value).Append(',');
else
_ = stringBuilder.Append('"').Append(processDataStandardFormat.Columns[c]).Append("\":\"").Append(value).Append("\",");
}
}
_ = stringBuilder.Remove(stringBuilder.Length - 1, 1);
_ = stringBuilder.AppendLine("}");
lines.Add(stringBuilder.ToString());
}
string json = $"[{string.Join(',', lines)}]";
results = JsonSerializer.Deserialize<JsonElement[]>(json);
}
return results;
}
private static int? TryGetPropertyIndex(JsonProperty[] jsonProperties, string propertyName)
{
int? result = null;
for (int i = 0; i < jsonProperties.Length; i++)
{
if (jsonProperties[i].Name != propertyName)
continue;
result = i;
break;
}
return result;
}
private static bool Compare(ILogger<Worker> logger, ReadOnlyCollection<string> ignore, ReadOnlyCollection<string> backfill, ReadOnlyCollection<string> indexOnly, ReadOnlyDictionary<string, string> keyValuePairs, string directory, JsonElement[] jsonElementsNew, JsonElement[] jsonElementsOld)
{
bool result;
int? q;
string valueNew;
string valueOld;
JsonProperty jsonPropertyOld;
JsonProperty jsonPropertyNew;
JsonProperty[] jsonPropertiesOld;
JsonProperty[] jsonPropertiesNew;
List<string> unknownColumns = [];
List<string> differentColumns = [];
int last = jsonElementsOld.Length - 1;
List<string> sameAfterSpaceSplitColumns = [];
for (int i = last; i > 0; i--)
{
if (jsonElementsOld[i].ValueKind != JsonValueKind.Object)
{
unknownColumns.Add(string.Empty);
break;
}
jsonPropertiesOld = jsonElementsOld[i].EnumerateObject().ToArray();
jsonPropertiesNew = jsonElementsNew[i].EnumerateObject().ToArray();
for (int p = 0; p < jsonPropertiesOld.Length; p++)
{
jsonPropertyOld = jsonPropertiesOld[p];
valueOld = jsonPropertyOld.Value.ToString();
if (ignore.Contains(jsonPropertyOld.Name))
{
if (i == last)
logger.LogDebug("{p} )) {jsonPropertyOld.Name} **", p, jsonPropertyOld.Name);
continue;
}
if (keyValuePairs.TryGetValue(jsonPropertyOld.Name, out string? name) && !string.IsNullOrEmpty(name))
{
q = TryGetPropertyIndex(jsonPropertiesNew, name);
if (q is null && i == 0)
unknownColumns.Add($"{jsonPropertyOld.Name}|{name}");
}
else
{
q = TryGetPropertyIndex(jsonPropertiesNew, jsonPropertyOld.Name);
if (q is null)
{
if (i == 0)
unknownColumns.Add(jsonPropertyOld.Name);
}
}
if (q is null)
{
if (i == last && !string.IsNullOrEmpty(valueOld))
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ??", p, jsonPropertyOld.Name);
}
else
{
jsonPropertyNew = jsonPropertiesNew[q.Value];
valueNew = jsonPropertyNew.Value.ToString();
if (i == last)
logger.LogDebug("{p} )) {jsonPropertyOld.Name} ~~ {q.Value} => {jsonPropertyNew.Name}", p, jsonPropertyOld.Name, q.Value, jsonPropertyNew.Name);
if (valueNew != valueOld && !differentColumns.Contains(jsonPropertyOld.Name))
{
if (valueNew.Length >= 2 && valueNew.Split(' ')[0] == valueOld)
sameAfterSpaceSplitColumns.Add(jsonPropertyOld.Name);
else
{
if (backfill.Contains(jsonPropertyOld.Name) && i != last)
continue;
if (indexOnly.Contains(jsonPropertyOld.Name) && int.TryParse(jsonPropertyOld.Name[^2..], out int index) && i != index - 1)
continue;
logger.LogWarning("For [{jsonProperty.Name}] <{directory}> doesn't match ({valueNew} != {valueOld})!", jsonPropertyOld.Name, directory, valueNew, valueOld);
differentColumns.Add(jsonPropertyOld.Name);
}
}
}
}
}
result = unknownColumns.Count == 0 && differentColumns.Count == 0 && sameAfterSpaceSplitColumns.Count == 0;
return result;
}
private static void Compare(ILogger<Worker> logger, int sourceDirectoryLength, ReadOnlyCollection<string> ignore, ReadOnlyCollection<string> backfill, ReadOnlyCollection<string> indexOnly, ReadOnlyDictionary<string, string> keyValuePairs, string searchPattern, string[] files)
{
bool isMatch;
string directory;
string[] matches;
string directorySegment;
string[] directoryFiles;
JsonElement[]? jsonElementsNew;
JsonElement[]? jsonElementsOld;
ProcessDataStandardFormat processDataStandardFormat;
FileInfo[] collection = files.Select(l => new FileInfo(l)).ToArray();
string[] sorted = (from l in collection orderby l.CreationTime descending select l.FullName).ToArray();
foreach (string file in sorted)
{
directory = Path.GetDirectoryName(file) ?? throw new Exception();
directoryFiles = Directory.GetFiles(directory, searchPattern, SearchOption.TopDirectoryOnly);
matches = (from l in directoryFiles where l != file select l).ToArray();
if (matches.Length < 1)
continue;
directorySegment = directory[sourceDirectoryLength..];
processDataStandardFormat = GetLogisticsColumnsAndBody(file, lines: null);
jsonElementsNew = GetArray(processDataStandardFormat);
if (jsonElementsNew is null)
continue;
foreach (string match in matches)
{
processDataStandardFormat = GetLogisticsColumnsAndBody(match, lines: null);
jsonElementsOld = GetArray(processDataStandardFormat);
if (jsonElementsOld is null || jsonElementsOld.Length != jsonElementsNew.Length)
continue;
isMatch = Compare(logger, ignore, backfill, indexOnly, keyValuePairs, directorySegment, jsonElementsNew, jsonElementsOld);
if (!isMatch)
{
logger.LogWarning("! <{match}>", match);
continue;
}
logger.LogInformation("<{match}>", match);
}
}
}
internal static void Compare(ILogger<Worker> logger, List<string> args)
{
string[] segmentsB;
List<string> distinct = [];
string searchPattern = args[2];
string searchPatternB = args[3];
string[] segments = args[7].Split(',');
Dictionary<string, string> keyValuePairs = [];
ReadOnlyCollection<string> ignore = args[4].Split(',').AsReadOnly();
ReadOnlyCollection<string> backfill = args[5].Split(',').AsReadOnly();
ReadOnlyCollection<string> indexOnly = args[6].Split(',').AsReadOnly();
foreach (string segment in segments)
{
segmentsB = segment.Split('|');
if (segmentsB.Length != 2)
continue;
if (distinct.Contains(segmentsB[1]))
continue;
distinct.Add(segmentsB[1]);
keyValuePairs.Add(segmentsB[0], segmentsB[1]);
}
string sourceDirectory = Path.GetFullPath(args[0]);
string[] files = Directory.GetFiles(sourceDirectory, searchPattern, SearchOption.AllDirectories);
logger.LogInformation("<{files}>(s)", files.Length);
Compare(logger, sourceDirectory.Length, ignore, backfill, indexOnly, keyValuePairs.AsReadOnly(), searchPatternB, files);
}
}