C++ 使用dlib20.0实现人脸比对功能

原创已于 2025-09-10 14:20:20 修改 · 281 阅读
CC 4.0 BY-SA版权
文章标签：
于 2025-09-10 14:00:00 首次发布
功能：实现基于 dlib20.0 的人脸特征提取与比对，通过 JNI 提供给 Java 调用，亦可其他
用途：用于身份验证、人脸匹配等场景（1:1 验证）
输出：两张人脸图像之间的欧氏距离（越小越相似，通常 < 0.4 表示同一人）
比对时间与图片大小有关，未使用GPU加速的情况3M以下小于1S，此功能本人亲自实现，可商用
测试图片与结果如下：
上传部分代码，如下：
// ==============================================================================
// 文件：FaceCompare.cpp
// 功能：实现基于 Dlib 的人脸特征提取与比对，通过 JNI 提供给 Java 调用
// 用途：用于身份验证、人脸匹配等场景（1:1 验证）
// 输出：两张人脸图像之间的欧氏距离（越小越相似，通常 < 0.4 表示同一人）
// ==============================================================================


#include "FaceCompare.h"

// 标准输入输出流，用于调试打印信息（如模型加载状态）
#include <iostream>

// Dlib 深度神经网络模块，用于构建和运行 ResNet 模型
#include <dlib/dnn.h>

// Dlib 人脸检测器（基于 HOG + SVM），用于定位图像中的人脸
#include <dlib/image_processing/frontal_face_detector.h>

// Dlib 人脸关键点检测器（landmark detection），用于对齐人脸
#include <dlib/image_processing.h>

// Dlib 图像读取模块，支持 JPG、PNG、BMP 等格式
#include <dlib/image_io.h>

// Dlib 图像变换模块，用于仿射变换、旋转、裁剪等操作
#include <dlib/image_transforms.h>

// Dlib 几何模块，提供点、矩形、变换矩阵等操作
#include <dlib/geometry.h>

// C++ 标准库：动态数组
#include <vector>

// C++ 标准库：时间相关（可用于随机种子）
#include <ctime>

// C++ 数学库：提供 isfinite、sqrt 等函数
#include <cmath>

// Windows API 头文件，用于获取 DLL 模块路径
#include <windows.h>


// 哈希表，用于缓存已计算的人脸特征向量，避免重复计算
#include <unordered_map>

#include <fstream>

// 使用命名空间简化代码
using namespace dlib;
using namespace std;

// =============================================================================
// 一、ResNet 网络结构定义（dlib_face_recognition_resnet_model_v1）
// =============================================================================

// 这部分定义了一个深度残差网络（ResNet），用于从人脸图像中提取 128 维特征向量。
// 该网络结构与 dlib 官方 face recognition 模型完全一致。

// 残差块（Residual Block）：add_prev1 表示将当前层输出与前一层相加（跳跃连接）
// 作用：缓解深层网络梯度消失问题，提升训练稳定性
// 参数说明：
//   block: 基础卷积块类型
//   N: 输出通道数
//   BN: 归一化层（如 affine）
//   SUBNET: 子网络（前一层）
template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;

// 下采样残差块：在跳跃连接前使用 avg_pool 进行降维
// 用于网络中需要缩小特征图尺寸的地方
// skip1<tag2<...>> 表示跳过一层连接到更早的层
template <template <int, template<typename>class, int, typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;

// 基础卷积块定义：
//   使用 3x3 卷积，步长为 stride
//   包含两层卷积 + BN + ReLU 激活
//   BN 是归一化层（如 affine）
//   SUBNET 是输入子网络
template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;

// 使用 affine（仿射变换）作为归一化层的残差块
// ares = affine + residual
template <int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;

// 定义网络层级结构（从深到浅）

// Level 0: 256 通道，下采样
template <typename SUBNET> using alevel0 = ares_down<256, SUBNET>;

// Level 1: 三个残差块（两个正常 + 一个下采样）
template <typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;

// Level 2: 128 通道
template <typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;

// Level 3: 64 通道
template <typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;

// Level 4: 32 通道
template <typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;

// 完整网络结构定义
// 输入：RGB 图像，固定大小 150x150
// 输出：128 维特征向量（通过 fc_no_bias 全连接层）
// loss_metric：表示这是一个度量学习网络，适合人脸验证任务
// avg_pool_everything：全局平均池化，将特征图压缩为 1x1
// max_pool<3,3,2,2>：最大池化，核大小 3x3，步长 2
// con<32,7,7,2,2>：7x7 卷积，32 通道，步长 2
// input_rgb_image_sized<150>：输入层，限定图像尺寸为 150x150
using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
    alevel0<
    alevel1<
    alevel2<
    alevel3<
    alevel4<
    max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2,
    input_rgb_image_sized<150>
    >>>>>>>>>>>>;

// =============================================================================
// 二、全局资源定义
// =============================================================================

// 特征向量缓存表
// key: 图像文件路径（字符串）
// value: 对应的 128D 特征向量
// 作用：避免重复加载同一图像并重新计算特征，提升性能
// 注意：未在当前函数中使用，可后续扩展启用
std::unordered_map<std::string, matrix<float, 128, 1>> descriptor_cache;

// 日志文件对象
// 路径：C:\Temp\face_compare.log
// 模式：追加写入（ios::app）
// 作用：记录模型加载、错误、调试信息，便于排查问题
std::ofstream log_file("C:\\Temp\\face_compare.log", std::ios::app);

// =============================================================================
// 三、工具函数：获取 DLL 所在目录
// =============================================================================

// 获取当前 DLL 模块所在的文件夹路径
// 为什么需要？因为模型文件（.dat）通常与 DLL 放在同一目录
// 如果硬编码路径，DLL 移动后会找不到模型
std::string getDllDirectory() {
    char path[MAX_PATH];  // Windows 最大路径长度为 260 字符
    HMODULE hModule = GetModuleHandleA("FaceCompare.dll");  // 尝试通过模块名获取句柄

    if (hModule == NULL) {
        // 若未找到，尝试通过函数地址反推模块
        HMODULE hMod = NULL;
        GetModuleHandleExA(
            GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |  // 从地址获取模块
            GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,  // 不增加引用计数
            (LPSTR)&getDllDirectory,  // 当前函数地址
            &hMod);  // 输出模块句柄

        if (hMod == NULL) return ".";  // 获取失败，返回当前目录
        GetModuleFileNameA(hMod, path, MAX_PATH);  // 获取完整路径
    } else {
        GetModuleFileNameA(hModule, path, MAX_PATH);
    }

    std::string fullPath(path);
    size_t pos = fullPath.find_last_of("\\/");  // 查找最后一个斜杠
    return pos == std::string::npos ? "." : fullPath.substr(0, pos);  // 返回目录部分
}

// =============================================================================
// 四、模型管理器（单例模式）
// =============================================================================

// 封装人脸检测、对齐、识别三大模型
// 使用单例模式确保：
//   1. 模型只加载一次（节省内存）
//   2. 避免重复初始化（提升性能）
//   3. 全局唯一访问点
class FaceModelManager {
public:
    shape_predictor sp;           // 5 点人脸关键点检测器
    anet_type net;                // ResNet 人脸识别模型
    frontal_face_detector detector; // 正面人脸检测器（HOG + SVM）

    // 单例访问接口
    static FaceModelManager& getInstance() {
        static FaceModelManager instance;  // 静态局部变量，线程安全（C++11 起）
        return instance;
    }

private:
    // 私有构造函数：禁止外部创建实例
    FaceModelManager() {
        try {
            std::string dllDir = getDllDirectory();
            std::string predictorPath = dllDir + "\\shape_predictor_5_face_landmarks.dat";
            std::string recognitionPath = dllDir + "\\dlib_face_recognition_resnet_model_v1.dat";

            // 调试输出：显示模型加载路径
            std::cout << "🔍 正在从以下路径加载模型:" << std::endl;
            std::cout << " Landmarks 模型: " << predictorPath << std::endl;
            std::cout << " Recognition 模型: " << recognitionPath << std::endl;

            // 反序列化加载模型文件
            deserialize(predictorPath) >> sp;        // 加载关键点检测器
            deserialize(recognitionPath) >> net;     // 加载人脸识别模型
            detector = get_frontal_face_detector();  // 获取内置人脸检测器

            std::cout << "✅ 模型加载成功！" << std::endl;
        }
        catch (const std::exception& e) {
            std::cerr << "❌ 模型加载失败: " << e.what() << std::endl;
            throw;  // 重新抛出异常，通知调用者
        }
    }

    // 禁止拷贝和赋值
    ~FaceModelManager() = default;
    FaceModelManager(const FaceModelManager&) = delete;
    FaceModelManager& operator=(const FaceModelManager&) = delete;
};

// =============================================================================
// 五、辅助函数
// =============================================================================

// 检查特征向量是否有效
// 防止出现 NaN 或 Inf（可能由模型损坏或输入异常引起）
bool is_finite(const matrix<float, 128, 1>& m) {
    //todo
}

// 构造 3x3 平移变换矩阵
// 用于图像仿射变换中的平移操作
matrix<double> translation_matrix(double dx, double dy) {
    //todo
    return t;
}

// 生成 [min, max] 范围内的随机浮点数
// 使用 dlib 的随机数生成器
double random_double(double min, double max, dlib::rand& rnd) {
    //todo
    ;
}

// 自定义图像抖动（Jittering）
// 作用：对图像进行轻微变换（旋转、缩放、平移），生成多个视角
// 目的：提升特征鲁棒性，减少姿态、光照变化的影响
// 注意：抖动后图像仍保持 150x150 大小
matrix<rgb_pixel> jitter_image_custom(const matrix<rgb_pixel>& img, dlib::rand& rnd) {
    //todo
    return result;
}

// 计算平均特征描述符
// 支持抖动增强（num_jitters > 0）
// 作用：提高特征稳定性，减少噪声影响
matrix<float, 128, 1> compute_avg_descriptor(anet_type& net, const matrix<rgb_pixel>& face_chip, int num_jitters = 0) {
    //todo
    return avg_desc;
}

// =============================================================================
// 六、JNI 导出函数（Java 调用入口）
// 函数名格式：Java_类名_方法名
// =============================================================================

extern "C" __declspec(dllexport)
double Java_FaceCompare_compareImages(const char* img1_path, const char* img2_path) {
    try {
        auto& model = FaceModelManager::getInstance();
        shape_predictor& sp = model.sp;
        anet_type& net = model.net;
        frontal_face_detector& detector = model.detector;

        // --- 处理第一张图像 ---
        

        // --- 处理第二张图像 ---
        


        // --- 计算相似度 ---
        double distance = length(desc1 - desc2);
        return distance;  // 距离越小越相似
    }
    catch (const std::exception& e) {
        std::cerr << "Exception in compareImages: " << e.what() << std::endl;
        return -1.0;
    }
}
如果有需要，私聊