1.reorg算子:重排
这个源自于yolo V2,如ssd网络一样,它会将不同层级不同大小的特征图concat到一起,用于多尺度检测,不同的是yolo V2使用reorg的方式来进行实现,如图所示:
已知输入大小为:2W*2W,需要得到W*W大小的特征图,那么就可以按照上面的方式,每次取4个元素分配给4个子特征图,按照这种方式从左到右,从上到下遍历就可以得到4个W*W的特征图,多通道的与之类似,对应于ncnn源码为:
int Reorg::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
// 计算输出特征图大小
int outw = w / stride;
int outh = h / stride;
// 计算输出的特征图通道数
int outc = channels * stride * stride;
// 为输出blob分配内存
top_blob.create(outw, outh, outc, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
// 每次对一个channel进行操作
const Mat m = bottom_blob.channel(q);
// 每次取出stride*stride个元素
for (int sh = 0; sh < stride; sh++)
{
for (int sw = 0; sw < stride; sw++)
{
// 每个channel对应stride*stride个feature map
float* outptr = top_blob.channel(q*stride*stride + sh*stride + sw);
for (int i = 0; i < outh; i++)
{
// 取出当前channel对应元素
const float* sptr = m.row(i*stride + sh) + sw;
for (int j = 0; j < outw; j++)
{
// 赋值
outptr[0] = sptr[0];
sptr += stride;
outptr++;
}
}
}
}
}
return 0;
}
这个就很清晰了。
2.Relu操作
这个很简单,就是只取大于0的部分,小于0的部分置为0,ncnn中会有一个读入slope的步骤,就是如果输入大于0,且存在slope,那么将输入乘以一个slope,很简单,代码为:
int ReLU::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
if (bottom_top_blob.elemsize == 1u)
return ReLU::forward_inplace_int8(bottom_top_blob, opt);
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
// 将小于0的部分置为0
if (slope == 0.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
if (ptr[i] < 0)
ptr[i] = 0;
}
}
}
else
{
// 将大于0的部分乘以一个slope
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
if (ptr[i] < 0)
ptr[i] *= slope;
}
}
}
return 0;
}
3.HardSwish操作
这个激活函数来自于MobilenetV3,具体公式为:
具体可以参考资料[2],在ncnn里面实际实现的好像是ReLU5/5的版本,具体代码如下:
int HardSwish::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
// 小于lower: -2.5
if (ptr[i] < lower)
ptr[i] = 0.f;
// 大于uppper:2.5
else if (ptr[i] > upper) ;
// x*(x*0.2f + 0.5f)
else
ptr[i] = ptr[i] * (ptr[i] * alpha + beta);
}
}
return 0;
}
4.Interp:插值操作
就是在resize图像时,需要对图像进行插值,主要分为最近邻插值、双线性插值和三次样方插值,这个是图像处理的知识:
首先看一下双线性插值,其实就是两次线性插值,如图所示:
P1,P2,P3,P4四个未知的像素已知,现在需要求Pm处的像素值,第一次插值就是沿着X方向:
第二次插值就是沿着Y方向:
5.Log:对数算子
其实就是取对数,公式如下:
具体代码如下:
int Log::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
// 默认底为e
if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
// 求解对数
ptr[i] = log(shift + ptr[i] * scale);
}
}
}
else
{
float log_base_inv = 1.f / log(base);
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
// 求解对数
for (int i=0; i<size; i++)
{
ptr[i] = log(shift + ptr[i] * scale) * log_base_inv;
}
}
}
return 0;
}
6.noop:无操作
7.lrn:局部响应归一化操作
这里分为两种情况,第一种是归一化区域为cross channel,很简单就是归一化区域为channel间,如图所示为一个WxHxC的Mat:
每次取出1 x local_size个元素,求元素的平方和,然后对(x, y, i)位置的数值和其周围的数值对(x, y, i)位置进行正则化,可以参考资料[3]:
第二种归一化区域就是within channel,就是在每次在一个channel上选取一个local_size x local_size大小区域,求元素的平方和,然后对(x, y, i)位置的数值和其周围的数值对(x, y, i)位置进行正则化,具体代码如下:
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
size_t elemsize = bottom_top_blob.elemsize;
int size = w * h;
// squared values with local_size padding
Mat square_blob;
square_blob.create(w, h, channels, elemsize, opt.workspace_allocator);
if (square_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_top_blob.channel(q);
float* outptr = square_blob.channel(q);
// 求每个像素的像素值平方
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * ptr[i];
}
}
// 跨通道的norm区域
if (region_type == NormRegion_ACROSS_CHANNELS)
{
// 平方和矩阵
Mat square_sum;
square_sum.create(w, h, channels, elemsize, opt.workspace_allocator);
if (square_sum.empty())
return -100;
square_sum.fill(0.f);
// alpha = 1.0, local_size = 5
const float alpha_div_size = alpha / local_size;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
// square sum
float* ssptr = square_sum.channel(q);
// 5 x 5大小的region
for (int p=q - local_size / 2; p<=q + local_size / 2; p++)
{
if (p < 0 || p >= channels)
continue;
const float* sptr = square_blob.channel(p);
// 按照channel方向,求local_size范围内的像素平方和
for (int i=0; i<size; i++)
{
ssptr[i] += sptr[i];
}
}
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
// x * (1 + 0.2 * ssptr)^ (-0.75)
ptr[i] = ptr[i] * pow(bias + alpha_div_size * ssptr[i], -beta);
}
}
}
// channel内,就是不对边界处进行处理
else if (region_type == NormRegion_WITHIN_CHANNEL)
{
int outw = w;
int outh = h;
Mat square_blob_bordered = square_blob;
int pad = local_size / 2;
if (pad > 0)
{
Option opt_b = opt;
opt_b.blob_allocator = opt.workspace_allocator;
copy_make_border(square_blob, square_blob_bordered, pad, local_size - pad - 1, pad, local_size - pad - 1, BORDER_CONSTANT, 0.f, opt_b);
if (square_blob_bordered.empty())
return -100;
w = square_blob_bordered.w;
h = square_blob_bordered.h;
}
// maxk = 25
const int maxk = local_size * local_size;
// alpha = 1.0
const float alpha_div_size = alpha / maxk;
// norm window offsets
// 计算norm窗口的偏移量
std::vector<int> _space_ofs(maxk);
int* space_ofs = &_space_ofs[0];
{
int p1 = 0;
int p2 = 0;
// 每一行元素之间间隔
int gap = w - local_size;
for (int i = 0; i < local_size; i++)
{
for (int j = 0; j < local_size; j++)
{
space_ofs[p1] = p2;
p1++;
p2++;
}
p2 += gap;
}
}
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
const Mat m = square_blob_bordered.channel(q);
for (int i = 0; i < outh; i++)
{
for (int j = 0; j < outw; j++)
{
const float* sptr = m.row(i) + j;
float ss = 0.f;
// 计算当前channel内,每个norm窗口内的平方和
for (int k = 0; k < maxk; k++)
{
float val = sptr[ space_ofs[k] ];
ss += val;
}
ptr[j] = ptr[j] * pow(bias + alpha_div_size * ss, -beta);
}
ptr += outw;
}
}
}
return 0;
}
8.MemoryData:从文件中读取一个Mat
就是从文件中读取一个Mat数据:
// 从文件中读取一个Mat
int MemoryData::forward(const std::vector<Mat>& /*bottom_blobs*/, std::vector<Mat>& top_blobs, const Option& opt) const
{
Mat& top_blob = top_blobs[0];
top_blob = data.clone(opt.blob_allocator);
if (top_blob.empty())
return -100;
return 0;
}
9.normalize:归一化操作
直观来说,就是每个元素除以其模长,这里分为三种情况:
第一种:直接考虑全局,就是WxHxC个元素的模长;
第二种:考虑每个channel,就是计算当前channel上WxH个元素的模长;
第三种:考虑WxH个位置上,每个位置的Cx1个元素的模长;
代码如下:
int Normalize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int size = w * h;
top_blob.create(w, h, channels, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
// 整体上的归一化
if (across_spatial && across_channel)
{
// square
Mat square_sum_blob;
square_sum_blob.create(channels, elemsize, opt.workspace_allocator);
if (square_sum_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
// 每个channel上元素平方和
float ssum = 0.f;
for (int i=0; i<size; i++)
{
ssum += ptr[i] * ptr[i];
}
square_sum_blob[q] = ssum;
}
// 所有元素的平方和
float ssum = 0.f;
for (int q=0; q<channels; q++)
{
ssum += square_sum_blob[q];
}
float a;
// 计算模长
if (eps_mode == 0) // caffe/mxnet
{
a = 1.f / sqrt(ssum + eps);
}
else if (eps_mode == 1) // pytorch
{
a = 1.f / std::max((float)sqrt(ssum), eps);
}
else //if (eps_mode == 2) // tensorflow
{
a = 1.f / sqrt(std::max(ssum, eps));
}
// 除以模长
// channel shared:所有channel的scale都等于scale_data[0]
if (channel_shared)
{
float scale = a * scale_data[0];
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * scale;
}
}
}
else
{
// 每个channel对应不同的scale
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float scale = a * scale_data[q];
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * scale;
}
}
}
return 0;
}
// 对每个channel归一化
if (across_spatial && !across_channel)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float ssum = 0.f;
for (int i=0; i<size; i++)
{
ssum += ptr[i] * ptr[i];
}
float a;
if (eps_mode == 0) // caffe/mxnet
{
a = 1.f / sqrt(ssum + eps);
}
else if (eps_mode == 1) // pytorch
{
a = 1.f / std::max((float)sqrt(ssum), eps);
}
else //if (eps_mode == 2) // tensorflow
{
a = 1.f / sqrt(std::max(ssum, eps));
}
// 计算每个channel上的scale
float scale = a * (channel_shared ? scale_data[0] : scale_data[q]);
// 归一化
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * scale;
}
}
return 0;
}
// 对wxh的每个元素进行归一化
if (!across_spatial && across_channel)
{
// square sum, 1 / sqrt(ssum)
Mat square_sum_blob;
square_sum_blob.create(size, elemsize, opt.workspace_allocator);
if (square_sum_blob.empty())
return -100;
// 如果channel share
if (channel_shared)
{
float scale = scale_data[0];
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<size; i++)
{
float ssum = 0.f;
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
ssum += ptr[i] * ptr[i];
}
float a;
if (eps_mode == 0) // caffe/mxnet
{
a = 1.f / sqrt(ssum + eps);
}
else if (eps_mode == 1) // pytorch
{
a = 1.f / std::max((float)sqrt(ssum), eps);
}
else //if (eps_mode == 2) // tensorflow
{
a = 1.f / sqrt(std::max(ssum, eps));
}
square_sum_blob[i] = a * scale;
}
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * square_sum_blob[i];
}
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<size; i++)
{
float ssum = 0.f;
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
ssum += ptr[i] * ptr[i];
}
float a;
if (eps_mode == 0) // caffe/mxnet
{
a = 1.f / sqrt(ssum + eps);
}
else if (eps_mode == 1) // pytorch
{
a = 1.f / std::max((float)sqrt(ssum), eps);
}
else //if (eps_mode == 2) // tensorflow
{
a = 1.f / sqrt(std::max(ssum, eps));
}
square_sum_blob[i] = a;
}
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float scale = scale_data[q];
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] * square_sum_blob[i] * scale;
}
}
}
return 0;
}
return 0;
}
10.Permute:置换操作
就是按照某个order将输入mat进行重排:
针对二维形式:
order_type
0 = w h:默认格式
1 = h w:取(h, w)元素操作:顺序为w->h
对于三维形式:
order_type
0 = w h c,默认格式
1 = h w c,取(h, w, c)元素操作:顺序为c->w->h
2 = w c h,取(w, c, h)元素操作:顺序为h->c->w
3 = c w h,取(c, w, h)元素操作:顺序为h->w->c
4 = h c w,取(h,, c, w)元素操作:顺序为w->c->h
5 = c h w,取(c, h, w)元素操作:顺序为w->h->c
具体代码为:
// 按照指定order,对输入进行重排
// 默认存储顺序为:二维wxh,三维wxhxc
int Permute::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int dims = bottom_blob.dims;
if (dims == 2)
{
// order_type
// 0 = w h
// 1 = h w
if (order_type == 0)
{
top_blob = bottom_blob;
}
// wxh变成hxw
else if (order_type == 1)
{
top_blob.create(h, w, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
const float* ptr = bottom_blob;
float* outptr = top_blob;
// 对重排后的Mat赋值
for (int i = 0; i < w; i++)
{
for (int j = 0; j < h; j++)
{
outptr[i*h + j] = ptr[j*w + i];
}
}
}
return 0;
}
// order_type
// 0 = w h c
// 1 = h w c
// 2 = w c h
// 3 = c w h
// 4 = h c w
// 5 = c h w
// 0.wxhxc
if (order_type == 0)
{
top_blob = bottom_blob;
}
// 1.取(h, w, c)元素:顺序为c->w->h
else if (order_type == 1)
{
top_blob.create(h, w, channels, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
for (int i = 0; i < w; i++)
{
for (int j = 0; j < h; j++)
{
outptr[i*h + j] = ptr[j*w + i];
}
}
}
}
// 2.取(w, c, h)元素:顺序为h->c->w
else if (order_type == 2)
{
top_blob.create(w, channels, h, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<h; q++)
{
float* outptr = top_blob.channel(q);
for (int i = 0; i < channels; i++)
{
const float* ptr = bottom_blob.channel(i).row(q);
for (int j = 0; j < w; j++)
{
outptr[i*w + j] = ptr[j];
}
}
}
}
// 3.取(c, w, h)元素:顺序为h->w->c
else if (order_type == 3)
{
top_blob.create(channels, w, h, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<h; q++)
{
float* outptr = top_blob.channel(q);
for (int i = 0; i < w; i++)
{
for (int j = 0; j < channels; j++)
{
const float* ptr = bottom_blob.channel(j).row(q);
outptr[i*channels + j] = ptr[i];
}
}
}
}
// 4.取(h,, c, w)元素:顺序为w->c->h
else if (order_type == 4)
{
top_blob.create(h, channels, w, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<w; q++)
{
float* outptr = top_blob.channel(q);
for (int i = 0; i < channels; i++)
{
const float* ptr = bottom_blob.channel(i);
for (int j = 0; j < h; j++)
{
outptr[i*h + j] = ptr[j*w + q];
}
}
}
}
// 5.取(c, h, w)元素:顺序为w->h->c
else if (order_type == 5)
{
top_blob.create(channels, h, w, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<w; q++)
{
float* outptr = top_blob.channel(q);
for (int i = 0; i < h; i++)
{
for (int j = 0; j < channels; j++)
{
const float* ptr = bottom_blob.channel(j);
outptr[i*channels + j] = ptr[i*w + q];
}
}
}
}
return 0;
}
11.power:指数操作
计算公式为:
具体代码如下:
int Power::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* ptr = bottom_top_blob.channel(q);
for (int i=0; i<size; i++)
{
ptr[i] = pow((shift + ptr[i] * scale), power);
}
}
return 0;
}
12.Prelu操作
计算公式为:
核心代码如下:
if (ptr[i] < 0)
ptr[i] *= slope;
13.MVN操作:mean variance normalization
就是减去均值,除以标准差,将输入转换成0均值,方差为1,计算公式为:
具体操作分为两种情况:
第一种:对每个channel进行操作,先求取每个channel的均值,然后每个channel元素减去各自均值,其次求每个channel的标准差,最后将每个去均值的channel元素除以标准差;
第二种:将所有channel视为一个整体,先求取整体的均值,然后每个元素减去均值,其次求所有元素的标准差,最后将所有减去均值的元素除以标准差即可;
具体代码如下:
int MVN::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
size_t elemsize = bottom_blob.elemsize;
int size = w * h;
top_blob.create(w, h, channels, elemsize, opt.blob_allocator);
if (top_blob.empty())
return -100;
// prepare sum per channel
// 对每个通道像素求和
Mat sum(channels, elemsize, opt.workspace_allocator);
if (sum.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float s = 0.f;
for (int i=0; i<size; i++)
{
s += ptr[i];
}
sum[q] = s;
}
// 如果跨通道
if (across_channels)
{
// compute mean across channels
// 求所有channel的像素均值
float mean = 0.f;
for (int q=0; q<channels; q++)
{
mean += sum[q];
}
mean = mean / (channels * size);
// subtract mean
// 然后去均值
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] - mean;
}
}
}
else
{
// subtract mean
// 每个通道减去各自的均值
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = bottom_blob.channel(q);
float* outptr = top_blob.channel(q);
float mean = sum[q] / size;
for (int i=0; i<size; i++)
{
outptr[i] = ptr[i] - mean;
}
}
}
// 方差归一化
if (normalize_variance)
{
// prepare squared sum per channel
Mat sqsum(channels, elemsize, opt.workspace_allocator);
if (sqsum.empty())
return -100;
// 求取标准差
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
const float* ptr = top_blob.channel(q);
float s = 0.f;
for (int i=0; i<size; i++)
{
s += ptr[i] * ptr[i];
}
sqsum[q] = s;
}
// 如果跨通道
if (across_channels)
{
// compute squared mean across channels
float sqmean = 0.f;
for (int q=0; q<channels; q++)
{
sqmean += sqsum[q];
}
// 求全局的标准差
sqmean = sqmean / (channels * size);
// normalize variance
float norm_var = sqrt(sqmean) + eps;
float norm_var_inv = 1.f / norm_var;
// apply normalize_variance
// 遍历所有元素,除以标准差
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);
for (int i=0; i<size; i++)
{
outptr[i] = outptr[i] * norm_var_inv;
}
}
}
else
{
// apply normalize_variance
// 对每个channel进行操作
#pragma omp parallel for num_threads(opt.num_threads)
for (int q=0; q<channels; q++)
{
float* outptr = top_blob.channel(q);
float sqmean = sqsum[q] / size;
float norm_var = sqrt(sqmean) + eps;
float norm_var_inv = 1.f / norm_var;
for (int i=0; i<size; i++)
{
outptr[i] = outptr[i] * norm_var_inv;
}
}
}
}
return 0;
}
14 .PriorBox:生成先验框
具体分为两种情况:
第一种,只提供num_min_size个尺寸min_sizes和num_ratios个宽高比例aspect_ratios,为了防止太多先验框,避免生成w*h*num_min_size*num_ratios,只生成w*h*(num_min_size+num_ratios-1)个先验框,生成策略为:
(1) aspect_ratios[0]与min_sizes[i], i <=num_min_size
(2) min_sizes[0]与aspect_ratios[i], 1< i <= num_ratios
第二种,同时提供num_min_size个最小尺寸min_sizes,num_max_size个最大尺寸max_sizes,和num_ratios个宽高比aspect_ratios,生成w*h*(num_min_size + num_min_size * num_ratios + num_max_size)个先验框。
15.Proposal:根据先验anchors和输入来生成边界框
(1)根据anchors和输入,来生成物体所在位置的矩形框;
(2)对每个生成的矩形框做边界检测;
(3)非极大值抑制操作,过滤掉重叠的边界框;
(4)最后将结果保存到输出的blob中输出。
~~~未完待续~~~
参考资料:
[1] https://zhuanlan.zhihu.com/p/60213361
[2] https://zhuanlan.zhihu.com/p/67487416
[3] https://zhuanlan.zhihu.com/p/87117010