using DlibDotNet; using DlibDotNet.Dnn; using System.Collections.ObjectModel; using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using View_by_Distance.FaceRecognitionDotNet.Dlib.Python; using View_by_Distance.FaceRecognitionDotNet.Extensions; using View_by_Distance.Shared.Models; using View_by_Distance.Shared.Models.Stateless; namespace View_by_Distance.FaceRecognitionDotNet; public class FaceRecognition : DisposableObject { public FaceDetector? CustomFaceDetector { get; set; } public FaceLandmarkDetector? CustomFaceLandmarkDetector { get; set; } private readonly Model _Model; private readonly int _NumberOfJitters; private readonly LossMetric _FaceEncoder; private readonly LossMmod _CnnFaceDetector; private readonly int _NumberOfTimesToUpsample; private readonly PredictorModel _PredictorModel; private readonly FrontalFaceDetector _FaceDetector; private readonly ShapePredictor _PosePredictor5Point; private readonly ShapePredictor _PosePredictor68Point; public FaceRecognition(int numberOfJitters, int numberOfTimesToUpsample, Model model, ModelParameter modelParameter, PredictorModel predictorModel) { if (modelParameter is null) throw new NullReferenceException(nameof(modelParameter)); if (modelParameter.PosePredictor5FaceLandmarksModel is null) throw new NullReferenceException(nameof(modelParameter.PosePredictor5FaceLandmarksModel)); if (modelParameter.PosePredictor68FaceLandmarksModel is null) throw new NullReferenceException(nameof(modelParameter.PosePredictor68FaceLandmarksModel)); if (modelParameter.CnnFaceDetectorModel is null) throw new NullReferenceException(nameof(modelParameter.CnnFaceDetectorModel)); if (modelParameter.FaceRecognitionModel is null) throw new NullReferenceException(nameof(modelParameter.FaceRecognitionModel)); _Model = model; _PredictorModel = predictorModel; _NumberOfJitters = numberOfJitters; _NumberOfTimesToUpsample = numberOfTimesToUpsample; _FaceDetector?.Dispose(); _FaceDetector = DlibDotNet.Dlib.GetFrontalFaceDetector(); _PosePredictor68Point?.Dispose(); _PosePredictor68Point = ShapePredictor.Deserialize(modelParameter.PosePredictor68FaceLandmarksModel); _PosePredictor5Point?.Dispose(); _PosePredictor5Point = ShapePredictor.Deserialize(modelParameter.PosePredictor5FaceLandmarksModel); _CnnFaceDetector?.Dispose(); _CnnFaceDetector = LossMmod.Deserialize(modelParameter.CnnFaceDetectorModel); _FaceEncoder?.Dispose(); _FaceEncoder = LossMetric.Deserialize(modelParameter.FaceRecognitionModel); } public static double FaceDistance(FaceEncoding faceEncoding, FaceEncoding faceToCompare) { if (faceEncoding is null) throw new NullReferenceException(nameof(faceEncoding)); if (faceToCompare is null) throw new NullReferenceException(nameof(faceToCompare)); faceEncoding.ThrowIfDisposed(); faceToCompare.ThrowIfDisposed(); if (faceEncoding.Encoding.Size == 0) return 0; using Matrix? diff = faceEncoding.Encoding - faceToCompare.Encoding; return DlibDotNet.Dlib.Length(diff); } private static FacePoint[] Join(IEnumerable facePoints1, IEnumerable facePoints2) { List results = [.. facePoints1, .. facePoints2]; return results.ToArray(); } private List<(FacePart, FacePoint[])> GetFaceParts(FullObjectDetection fullObjectDetection) { List<(FacePart, FacePoint[])> results = []; FacePoint[] facePoints = Enumerable.Range(0, (int)fullObjectDetection.Parts) .Select(index => new FacePoint(index, fullObjectDetection.GetPart((uint)index).X, fullObjectDetection.GetPart((uint)index).Y)) .ToArray(); switch (_PredictorModel) { case PredictorModel.Custom: throw new NotImplementedException(); case PredictorModel.Large: if (facePoints.Length == 68) { results.Add(new(FacePart.Chin, facePoints.Skip(0).Take(17).ToArray())); results.Add(new(FacePart.LeftEyebrow, facePoints.Skip(17).Take(5).ToArray())); results.Add(new(FacePart.RightEyebrow, facePoints.Skip(22).Take(5).ToArray())); results.Add(new(FacePart.NoseBridge, facePoints.Skip(27).Take(5).ToArray())); results.Add(new(FacePart.NoseTip, facePoints.Skip(31).Take(5).ToArray())); results.Add(new(FacePart.LeftEye, facePoints.Skip(36).Take(6).ToArray())); results.Add(new(FacePart.RightEye, facePoints.Skip(42).Take(6).ToArray())); results.Add(new(FacePart.TopLip, Join(facePoints.Skip(48).Take(7), facePoints.Skip(60).Take(5)))); results.Add(new(FacePart.BottomLip, Join(facePoints.Skip(55).Take(5), facePoints.Skip(65).Take(3)))); } break; case PredictorModel.Small: if (facePoints.Length == 5) { results.Add(new(FacePart.RightEye, facePoints.Skip(0).Take(2).ToArray())); results.Add(new(FacePart.LeftEye, facePoints.Skip(2).Take(2).ToArray())); results.Add(new(FacePart.NoseTip, facePoints.Skip(4).Take(1).ToArray())); } break; } return results; } private MModRect[] GetMModRects(Image image) { switch (_Model) { case Model.Cnn: return CnnFaceDetectionModelV1.Detect(_CnnFaceDetector, image, _NumberOfTimesToUpsample).ToArray(); case Model.Hog: IEnumerable>? locations = SimpleObjectDetector.RunDetectorWithUpscale2(_FaceDetector, image, (uint)_NumberOfTimesToUpsample); return locations.Select(l => new MModRect { Rect = l.Item1, DetectionConfidence = l.Item2 }).ToArray(); case Model.Custom: if (CustomFaceDetector is null) throw new NotSupportedException("The custom face detector is not ready."); return CustomFaceDetector.Detect(image, _NumberOfTimesToUpsample).Select(rect => new MModRect { Rect = new DlibDotNet.Rectangle(rect.Left, rect.Top, rect.Right, rect.Bottom), DetectionConfidence = rect.Confidence }).ToArray(); default: throw new Exception(); } } public List FaceLocations(Image image) { if (image is null) throw new NullReferenceException(nameof(image)); image.ThrowIfDisposed(); ThrowIfDisposed(); List results = []; System.Drawing.Rectangle rectangle; IEnumerable mModRects = GetMModRects(image); foreach (MModRect? mModRect in mModRects) { rectangle = new(mModRect.Rect.Left, mModRect.Rect.Top, (int)mModRect.Rect.Width, (int)mModRect.Rect.Height); Location location = Shared.Models.Stateless.Methods.ILocation.TrimBound(mModRect.DetectionConfidence, rectangle, image.Width, image.Height, mModRects.Count()); mModRect.Dispose(); results.Add(location); } return results; } private List GetFullObjectDetections(Image image, List locations) { List results = []; if (_PredictorModel == PredictorModel.Custom) { if (CustomFaceLandmarkDetector is null) throw new NullReferenceException(nameof(CustomFaceLandmarkDetector)); foreach (Location location in locations) { FullObjectDetection fullObjectDetection = CustomFaceLandmarkDetector.Detect(image, location); results.Add(fullObjectDetection); } } else { ShapePredictor posePredictor = _PredictorModel switch { PredictorModel.Large => _PosePredictor68Point, PredictorModel.Small => _PosePredictor5Point, PredictorModel.Custom => throw new NotImplementedException(), _ => throw new Exception() }; foreach (Location location in locations) { DlibDotNet.Rectangle rectangle = new(location.Left, location.Top, location.Right, location.Bottom); FullObjectDetection fullObjectDetection = posePredictor.Detect(image.Matrix, rectangle); results.Add(fullObjectDetection); } } return results; } private List GetLocations(Image image) { List results = []; MModRect[] mModRects = GetMModRects(image); if (mModRects.Length != 0) { Location location; System.Drawing.Rectangle rectangle; foreach (MModRect? mModRect in mModRects) { rectangle = new(mModRect.Rect.Left, mModRect.Rect.Top, (int)mModRect.Rect.Width, (int)mModRect.Rect.Height); location = Shared.Models.Stateless.Methods.ILocation.TrimBound(mModRect.DetectionConfidence, rectangle, image.Width, image.Height, mModRects.Length); mModRect.Dispose(); results.Add(location); } } return results; } public List<(Location, FaceEncoding?, Dictionary?)> GetCollection(Image image, List locations, bool includeFaceEncoding, bool includeFaceParts) { List<(Location, FaceEncoding?, Dictionary?)> results = []; if (image is null) throw new NullReferenceException(nameof(image)); image.ThrowIfDisposed(); ThrowIfDisposed(); if (_PredictorModel == PredictorModel.Custom) throw new NotSupportedException("FaceRecognition.PredictorModel.Custom is not supported."); if (locations.Count == 0) locations.AddRange(GetLocations(image)); List fullObjectDetections = GetFullObjectDetections(image, locations); if (fullObjectDetections.Count != locations.Count) throw new Exception(); List<(Location Location, List FaceEncodings, List> FaceParts)> collection = []; foreach (Location location in locations) collection.Add(new(location, [], [])); if (locations.Count != collection.Count) throw new Exception(); if (!includeFaceEncoding) { for (int i = 0; i < collection.Count; i++) collection[i].FaceEncodings.Add(null); } else { Matrix doubles; FaceEncoding faceEncoding; for (int i = 0; i < collection.Count; i++) { doubles = FaceRecognitionModelV1.ComputeFaceDescriptor(_FaceEncoder, image, fullObjectDetections[i], _NumberOfJitters); faceEncoding = new(doubles); collection[i].FaceEncodings.Add(faceEncoding); } } if (!includeFaceParts) { for (int i = 0; i < collection.Count; i++) collection[i].FaceParts.Add([]); } else { List<(FacePart, FacePoint[])> faceParts; for (int i = 0; i < collection.Count; i++) { faceParts = GetFaceParts(fullObjectDetections[i]); collection[i].FaceParts.Add(faceParts); } } foreach (FullObjectDetection fullObjectDetection in fullObjectDetections) fullObjectDetection.Dispose(); const int indexZero = 0; Dictionary keyValuePairs; foreach ((Location location, List faceEncodings, List> faceParts) in collection) { if (faceEncodings.Count != 1 || faceParts.Count != 1) continue; if (faceParts[indexZero].Count == 0) results.Add(new(location, faceEncodings[indexZero], null)); else { keyValuePairs = []; foreach ((FacePart facePart, FacePoint[] facePoints) in faceParts[indexZero]) keyValuePairs.Add(facePart, facePoints); results.Add(new(location, faceEncodings[indexZero], keyValuePairs)); } } return results; } public static FaceEncoding LoadFaceEncoding(double[] encoding) { if (encoding is null) throw new NullReferenceException(nameof(encoding)); if (encoding.Length != 128) { string message = $"{nameof(encoding)}.{nameof(encoding.Length)} must be 128."; throw new ArgumentOutOfRangeException(message); } #pragma warning disable Matrix? matrix = Matrix.CreateTemplateParameterizeMatrix(0, 1); #pragma warning restore matrix.SetSize(128); matrix.Assign(encoding); return new FaceEncoding(matrix); } public static FaceEncoding LoadBFaceEncoding(double[] encoding) { if (encoding is null) throw new NullReferenceException(nameof(encoding)); if (encoding.Length != 512) { string message = $"{nameof(encoding)}.{nameof(encoding.Length)} must be 512."; throw new ArgumentOutOfRangeException(message); } #pragma warning disable Matrix? matrix = Matrix.CreateTemplateParameterizeMatrix(0, 1); #pragma warning restore matrix.SetSize(512); matrix.Assign(encoding); return new FaceEncoding(matrix); } public static Image LoadImageFile(string file, Mode mode = Mode.Rgb) { if (!File.Exists(file)) throw new FileNotFoundException(file); return mode switch { Mode.Rgb => new Image(DlibDotNet.Dlib.LoadImageAsMatrix(file), mode), Mode.Greyscale => new Image(DlibDotNet.Dlib.LoadImageAsMatrix(file), mode), _ => throw new NotImplementedException() }; } #pragma warning disable CA1416 public static Image? LoadImage(Bitmap bitmap) { Mode mode; int dstChannel; int srcChannel; int width = bitmap.Width; int height = bitmap.Height; PixelFormat format = bitmap.PixelFormat; System.Drawing.Rectangle rect = new(0, 0, width, height); switch (format) { case PixelFormat.Format8bppIndexed: mode = Mode.Greyscale; srcChannel = 1; dstChannel = 1; break; case PixelFormat.Format24bppRgb: mode = Mode.Rgb; srcChannel = 3; dstChannel = 3; break; case PixelFormat.Format32bppRgb: case PixelFormat.Format32bppArgb: mode = Mode.Rgb; srcChannel = 4; dstChannel = 3; break; default: throw new ArgumentOutOfRangeException($"{nameof(bitmap)}", $"The specified {nameof(PixelFormat)} is not supported."); } BitmapData? data = null; try { data = bitmap.LockBits(rect, ImageLockMode.ReadOnly, format); unsafe { byte[]? array = new byte[width * height * dstChannel]; fixed (byte* pArray = &array[0]) { byte* dst = pArray; switch (srcChannel) { case 1: { IntPtr src = data.Scan0; int stride = data.Stride; for (int h = 0; h < height; h++) Marshal.Copy(IntPtr.Add(src, h * stride), array, h * width, width * dstChannel); } break; case 3: case 4: { byte* src = (byte*)data.Scan0; int stride = data.Stride; for (int h = 0; h < height; h++) { int srcOffset = h * stride; int dstOffset = h * width * dstChannel; for (int w = 0; w < width; w++) { // BGR order to RGB order dst[dstOffset + w * dstChannel + 0] = src[srcOffset + w * srcChannel + 2]; dst[dstOffset + w * dstChannel + 1] = src[srcOffset + w * srcChannel + 1]; dst[dstOffset + w * dstChannel + 2] = src[srcOffset + w * srcChannel + 0]; } } } break; } IntPtr ptr = (IntPtr)pArray; switch (mode) { case Mode.Rgb: return new Image(new Matrix(ptr, height, width, width * 3), Mode.Rgb); case Mode.Greyscale: return new Image(new Matrix(ptr, height, width, width), Mode.Greyscale); } } } } finally { if (data != null) bitmap.UnlockBits(data); } return null; } public static ReadOnlyCollection GetLocationContainers(int permyriad, ReadOnlyCollection readOnlyLocationContainers, LocationContainer locationContainer) { List results = []; int lengthPermyriad; if (readOnlyLocationContainers.Count != 0) { double length; LocationContainer result; if (locationContainer.Encoding is not FaceEncoding faceEncodingToCompare) throw new NullReferenceException(nameof(locationContainer)); faceEncodingToCompare.ThrowIfDisposed(); foreach (LocationContainer item in readOnlyLocationContainers) { #pragma warning disable CA1513 if (item.Encoding is not FaceEncoding faceEncoding || faceEncoding.IsDisposed) throw new ObjectDisposedException($"{nameof(item)} contains disposed object."); #pragma warning restore CA1513 using (Matrix diff = faceEncoding.Encoding - faceEncodingToCompare.Encoding) length = DlibDotNet.Dlib.Length(diff); lengthPermyriad = (int)(length * permyriad); result = LocationContainer.Get(locationContainer, item, lengthPermyriad, keepExifDirectory: false, keepEncoding: false); results.Add(result); } } LocationContainer[] array = results.OrderBy(l => l.LengthPermyriad).ToArray(); return new(array); } public static List FaceDistances(ReadOnlyCollection faceDistances, FaceDistance faceDistanceToCompare) { List results = []; if (faceDistances.Count != 0) { double length; FaceDistance result; if (faceDistanceToCompare.Encoding is not FaceEncoding faceEncodingToCompare) throw new NullReferenceException(nameof(faceDistanceToCompare)); faceEncodingToCompare.ThrowIfDisposed(); foreach (FaceDistance faceDistance in faceDistances) { #pragma warning disable CA1513 if (faceDistance.Encoding is not FaceEncoding faceEncoding || faceEncoding.IsDisposed) throw new ObjectDisposedException($"{nameof(faceDistances)} contains disposed object."); #pragma warning restore CA1513 using (Matrix diff = faceEncoding.Encoding - faceEncodingToCompare.Encoding) length = DlibDotNet.Dlib.Length(diff); result = new(faceDistance, length); results.Add(result); } } return results; } #pragma warning restore CA1416 protected override void DisposeUnmanaged() { base.DisposeUnmanaged(); _PosePredictor68Point?.Dispose(); _PosePredictor5Point?.Dispose(); _CnnFaceDetector?.Dispose(); _FaceEncoder?.Dispose(); _FaceDetector?.Dispose(); } }