SIFT算法之python实现

最新推荐文章于 2025-06-19 17:52:40 发布

原创

最新推荐文章于 2025-06-19 17:52:40 发布 · 1.4w 阅读

224 ·

CC 4.0 BY-SA版权

0. 算法原理细节可参考：

https://blog.csdn.net/abcjennifer/article/details/7639681
https://blog.csdn.net/zddblog/article/details/7521424?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1
https://www.bilibili.com/video/BV1Qb411W7cK?p=1
本文参考了up主的代码，并在匹配阶段、金字塔输出方式等部分进行了重新设计，同时使代码实现完全对像素操作。

1. 算法流程

SIFT（Scale-invariant feature transform），即尺度不变特征变换，是一种检测图像局部特征并进行特征点匹配的算法。

算法流程如下：
在这里插入图片描述

2. python实现

OPENCV库中有SIFT函数，调用此函数很容易实现。下面的是直接对像素操作实现SIFT的源代码。

# coding: utf-8
import warnings

warnings.filterwarnings("ignore")  # 忽略警告
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image


def convolve(kernel, img, padding, strides):
    '''
    :param kernel:  输入的核函数
    :param img:    输入的图片
    :param padding:  需要填充的位置
    :param strides:   高斯核移动的步长
    :return:   返回卷积的结果
    '''
    result = None
    kernel_size = kernel.shape
    img_size = img.shape
    if len(img_size) == 3:  # 三通道图片就对每通道分别卷积  dstack和并
        channel = []
        for i in range(img_size[-1]):
            pad_img = np.pad(img[:, :, i], ((padding[0], padding[1]), (padding[2], padding[3])), 'constant')
            temp = []
            for j in range(0, img_size[0], strides[1]):
                temp.append([])
                for k in range(0, img_size[1], strides[0]):
                    val = (kernel * pad_img[j * strides[1]:j * strides[1] + kernel_size[0],
                                    k * strides[0]:k * strides[0] + kernel_size[1]]).sum()
                    temp[-1].append(val)
            channel.append(np.array(temp))

        channel = tuple(channel)
        result = np.dstack(channel)
    elif len(img_size) == 2:
        channel = []
        pad_img = np.pad(img, ((padding[0], padding[1]), (padding[2], padding[3])),
                         'constant')  # pad是填充函数 边界处卷积需要对边界外根据高斯核大小填0
        for j in range(0, img_size[0], strides[1]):  # 第j列 strides 是步长 本例步长为1 相当于遍历
            channel.append([])
            for k in range(0, img_size[1], strides[0]):  # 第i行
                val = (kernel * pad_img[j * strides[1]:j * strides[1] + kernel_size[0],
                                k * strides[0]:k * strides[0] + kernel_size[1]]).sum()  # 卷积的定义 相当于用高斯核做加权和
                channel[-1].append(val)

        result = np.array(channel)

    return result

# 函数1.1.1 undersampling
# 降采样,隔点取点
def undersampling(img, step=2):
    '''
    临近降采样
    :param img: 输入图片
    :param step: 降采样步长 默认为2（缩小两倍）
    :return: 返回降采样结果
    '''
    return img[::step, ::step]

# 函数1.1.2 GuassianKernel
# 产生高斯核
def GuassianKernel(sigma, dim):
    '''
    :param sigma: 标准差
    :param dim: 高斯核的纬度(必须是个奇数)
    :return: 返回高斯核
    '''
    temp = [t - (dim // 2) for t in range(dim)]  # 生成二维高斯的x与y
    assistant = []
    for i in range(dim):
        assistant.append(temp)
    assistant = np.array(assistant)
    temp = 2 * sigma * sigma
    result = (1.0 / (temp * np.pi)) * np.exp(-(assistant ** 2 + (assistant.T) ** 2) / temp)  # 二维高斯公式
    return result

# 函数1.1，getDoG
# 得到高斯金字塔和高斯差分金字塔
def getDoG(img, n, sigma0, S=None, O=None):
    '''
    :param img: 输入的图像
    :param sigma0: 输入的sigma
    :param n: 有几层用于提取特征
    :param S: 金字塔每层有几张gauss滤波后的图像
    :param O: 金字塔有几层
    :return: 返回差分高斯金字塔和高斯金字塔
    '''
    if S == None:
        S = n + 3  # 至少有4张 （第一张和最后一张高斯金字塔无法提取特征，差分以后的第一张和最后一张也无法提取特征）
    if O == None:
        O = int(np.log2(min(img.shape[0], img.shape[1]))) - 3  # 计算最大可以计算多少层 O=log2（min(img长，img宽））-3

    k = 2 ** (1.0 / n)
    sigma = [[(k ** s) * sigma0 * (1 << o) for s in range(S)] for o in range(O)]  # 每一层 sigma按照 k^1/s * sigama0  排列 下一层的sigma都要比上一层sigma大两倍
    sample = [undersampling(img, 1 << o) for o in range(O)]  # 降采样取图片作为该层的输入

    Guass_Pyramid = []
    for i in range(O):
        Guass_Pyramid.append([])  # 申明二维空数组
        for j in range(S):
            dim = int(6*sigma[i][j] + 1) # 上网查找相关信息 高斯核大小随sigma变化的效果更好
            #dim = int(9)
            if dim % 2 == 0:  # 防止输入的高斯核不是奇数
                dim += 1
            Guass_Pyramid[-1].append(convolve(GuassianKernel(sigma[i][j], dim), sample[i], [dim // 2, dim // 2, dim // 2, dim // 2], [1, 1]))  # 在第i层添加第j张 经过高斯卷积的 该图片四周扩展 5//2=2 用于高斯卷积
    DoG_Pyramid = [[Guass_Pyramid[o][s + 1] - Guass_Pyramid[o][s] for s in range(S - 1)] for o in range(O)]  #每一层中 上一张减去下一张得到高斯核

    return DoG_Pyramid, Guass_Pyramid, O  # 返回高斯金字塔和高斯差分金字塔

# 函数2.1.1 adjustLocalExtrema
# 功能：通过泰勒展开精调位置精调位置
def adjustLocalExtrema(DoG, o, s, x, y, contrastThreshold, edgeThreshold, sigma, n, SIFT_FIXPT_SCALE):
    SIFT_MAX_INTERP_STEPS = 5
    SIFT_IMG_BORDER = 5

    point = []

    img_scale = 1.0 / (255 * SIFT_FIXPT_SCALE)
    deriv_scale = img_scale * 0.5
    second_deriv_scale = img_scale
    cross_deriv_scale = img_scale * 0.25

    img = DoG[o][s]
    i = 0
    while i < SIFT_MAX_INTERP_STEPS:
        if s < 1 or s > n or y < SIFT_IMG_BORDER or y >= img