matterport_MaskRCNN（4）——测试模型

最新推荐文章于 2022-09-16 13:43:58 发布

原创最新推荐文章于 2022-09-16 13:43:58 发布 · 830 阅读

6 ·

CC 4.0 BY-SA版权

tensorflow 同时被 2 个专栏收录

6 篇文章

订阅专栏

MaskRCNN

4 篇文章

订阅专栏

本文介绍了如何使用Matterport的Mask R-CNN模型进行检测，包括自定义配置、准备测试集、创建inference模型、模型检测及可视化。同时，针对源码修改不生效的问题，提出了解决方案，特别是在处理visualize.py时添加了保存图像的功能。此外，还详细解释了如何利用自定义函数计算精度（pre）、召回率（rec）、平均精度（AP）和IoU。

模型的检测过程：

（1）自定义配置信息
（2）准备测试集test
（3）创建inference模式下的模型
（4）调用model.detect进行检测
（5）可视化，或者计算评价指标（AP等）
代码如下：（与训练过程的代码重复的部分，不再注释）

# -*- coding: utf-8 -*-
"""
Created on Tue Dec 17 15:01:49 2019

@author: Lenovo
"""
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
import yaml
import skimage.io

ROOT_DIR = os.path.abspath("../../")

sys.path.append(ROOT_DIR) 
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
################################################################################
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)   
################################################################################
class HooksConfig(Config):
    """
    在shapes数据集上进行配置修改，即创建继承于基类Config的子类并且修改对应的属性值即可
    """   
    NAME = "hooks"
    # 当使用GPU同时测试多张图片时，输入的图片数量必须=BatchSize=GPU_COUNT*IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2

    NUM_CLASSES = 1 + 1  # background + hooks
    IMAGE_MIN_DIM = 128*8
    IMAGE_MAX_DIM = 128*8
    RPN_ANCHOR_SCALES = (8*8, 16*8, 32*8, 64*8, 128*8)  
    TRAIN_ROIS_PER_IMAGE = 32
    STEPS_PER_EPOCH = 100
    VALIDATION_STEPS = 5
    
config = HooksConfig()
config.display()
################################################################################
def get_ax(rows=1, cols=1, size=8):
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax
################################################################################
class HookDataset(utils.Dataset):
    def get_obj_index(self, image):
        n = np.max(image)
        return n
    
    def from_yaml_get_class(self,image_id):
        info=self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp=yaml.load(f.read(),Loader=yaml.FullLoader)
            labels=temp['label_names']
            #labels = list(labels.keys())
            del labels[0]
        return labels

    def draw_mask(self, num_obj, mask, image):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] =1
        return mask

    def load_shapes(self, count, height, width, img_floder, mask_floder, imglist,dataset_root_path):

        self.add_class("shapes", 1, "hook")
        for i in range(count):
            filestr = imglist[i].split(".")[0]
            #print(filestr)
            #filestr = filestr.split("_")[1]
            mask_path = mask_floder + "/" + filestr + "_mask.png"
            yaml_path=dataset_root_path+"total/"+filestr+"_json/info.yaml"
            self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
                           width=width, height=height, mask_path=mask_path,yaml_path=yaml_path)

    def load_mask(self, image_id):

        info = self.image_info[image_id]  # 根据id从info中获取图片信息
        class_count = 1                         # 检测目标共有1类
        img = Image.open(info['mask_path']) # 根据mask路径打开图片的mask文件
        num_obj = self.get_obj_index(img)   # 由于mask的规则：第i个目标的mask像素值=i，所以通过像素值最大值，可以知道有多少个目标
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8) # 根据h,w和num创建三维数组（多张mask）
        mask = self.draw_mask(num_obj, mask, img) # 调用draw_mask画出mask
        
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)  
        for i in range(class_count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))

        labels=[]
        labels=self.from_yaml_get_class(image_id)
        labels_form=[]
        for i in range(len(labels)):
            if labels[i].find("hook")!=-1:  #如果labels[i]是hook
                #print "box"
                labels_form.append("hook")  # 添加到 label中
        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)
#######################自定义用于inference的配置文件############################
#配置信息与训练时使用的一致即可，根据需求修改部分参数
class InferenceConfig(HooksConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()
inference_config.display()

# 创建inference模式下的模型，推理模式不需要保存模型
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=' ')

# 给出模型文件路径，或自动寻找最新的模型
model_path = 'F:/0.MaskRCNN(3times)/dataset_matterport/NormalNoise/hooks20191129T1552_Normal/mask_rcnn_hooks_0009.h5'
#model_path = model.find_last()

# 载入模型权重
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

#########################创建测试集Test######################################################
#For Test(此处直接使用val，未修改)
dataset_Val_root_path="F:/0.MaskRCNN(3times)/dataset_matterport/NormalNoise/Val/"
img_floder_Val = dataset_Val_root_path+"rgb"
mask_floder_Val = dataset_Val_root_path+"mask"
imglist_Val = os.listdir(img_floder_Val) 
count_Val = len(imglist_Val)

dataset_val = HookDataset()
dataset_val.load_shapes(count_Val, 3648, 5472, img_floder_Val, mask_floder_Val, imglist_Val,dataset_Val_root_path)
dataset_val.prepare()
################################对单张图片进行测试并保存###############################################
# 随机选择一张图片，获取相关信息（原图，真实框，mask等）
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)
# 打印相关信息,以及原图的mask和box
log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                           dataset_val.class_names, figsize=(8, 8))

# 对图片进行检测，返回值是字典的列表，每个字典中内容就是检测结果
results = model.detect([original_image], verbose=1)
# 获取第一张图片的检测结果，根据检测结果进行可视化，并保存
r = results[0]
save_Dir='F:/0.MaskRCNN(3times)/dataset_matterport/NormalNoise'
temp=save_Dir+'/'+str(image_id)+'.png'
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax(),save_path=temp)
                           
##########################对多张图片进行test并保存#####################################################
'''注：打开注释后，要根据一次性进行test的图片， 修改config中的对应值
# 检测函数的输入可以是一个list列表，但是其图片数量必须等于BatchSize=GPU_COUNT*IMAGES_PER_GPU
# GPU_COUNT*IMAGES_PER_GPU在config中设置
image_ids = np.random.choice(dataset_val.image_ids, 3)
image_list=[]
for image_id in image_ids:
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    image_list.append(image)
results = model.detect(image_list, verbose=0)

save_Dir='E:/1.Maskrcnn/Mask_RCNN-master/samples/shapes'
temp=save_Dir+'/'+str(image_id)+'.png'
for i in range(0,3):
    r=results[i]
    image=image_list[i]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax(),save_path=temp)
'''
##########################检测多张图片并求AP值#####################################################

# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []

for image_id in image_ids:
    # 逐张检测图片
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    results = model.detect([image], verbose=0)
    r = results[0]
    
    # 计算AP值
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
  
print("mAP: ", np.mean(APs))

模型检测结果results以及字典元素

注意事项，关于修改源码不生效问题：

github的该项目在运行setup.py之后，会生成一个.egg文件，在我的mask虚拟环境中路径是D:\Anaconda\envs\mask\Lib\site-packages\mask_rcnn-2.1-py3.7.egg，当我们要修改一些文件的源码时，需要从该文件修改，并重启编译器才能生效。.egg文件其实是一个压缩包形式，所以我这里右键，使用好压打开，然后使用内部查看器可以需要所需要的改动的源码。
在这里插入图片描述

注意事项，对于visualize.py的修改

由于源码中给出的display_instance函数，只能够用于可视化检测结果，故添加保存功能。
增加参数save_path=None，函数会将显示出的图像保存到对应的save_path中，修改后的代码如下。

def display_instances(image, boxes, masks, class_ids, class_names,
                      scores=None, title="",
                      figsize=(16, 16), ax=None,
                      show_mask=True, show_bbox=True,
                      colors=None, captions=None,save_path=None):
 
    N = boxes.shape[0]
    if not N:
        print("\n*** No instances to display *** \n")
    else:
        assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]

    # If no axis is passed, create one and automatically call show()
    # 如果没有提供axis，创建一个axis并自动调用
    auto_show = False
    if not ax:
        _, ax = plt.subplots(1, figsize=figsize)
        auto_show = True

    # Generate random colors 生成随机颜色
    colors = colors or random_colors(N)

    # Show area outside image boundaries.
    # 显示图片边界外的部分
    height, width = image.shape[:2]  #获取图片的h,w
    ax.set_ylim(height + 10, -10)    #两次多10像素
    ax.set_xlim(-10, width + 10)     # 
    ax.axis('off')                   # 不显示坐标尺
    ax.set_title(title)              # 设置标题

    # 读取（copy）图片
    masked_image = image.astype(np.uint32).copy()
    for i in range(N):
        color = colors[i]
        # Bounding box 跳过四个值全0的边界框（可能是在图片crop时丢失）
        if not np.any(boxes[i]):
            # Skip this instance. Has no bbox. Likely lost in image cropping.
            continue
        y1, x1, y2, x2 = boxes[i]
        if show_bbox:
            # 使用Rectangle画矩形，参数（ 左下角坐标,w,h,线粗，a，线类别，边框颜色，填充颜色）
            p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
                                alpha=0.7, linestyle="dashed",
                                edgecolor=color, facecolor='none')
            # 将矩形添加到ax中
            ax.add_patch(p)
        # Label
        # 如果没有标题，获取class_id以及score，以“类别名+置信度”做标题
        if not captions:
            class_id = class_ids[i]
            score = scores[i] if scores is not None else None
            label = class_names[class_id]
            caption = "{} {:.3f}".format(label, score) if score else label
        else:
            caption = captions[i]
        # 将文本信息添加到ax中
        ax.text(x1, y1 + 8, caption,
                color='w', size=11, backgroundcolor="none")
        # Mask
        # 取第i个mask，如果设置显示mask，则apply_mask(给图片的mask对应部分上色)
        mask = masks[:, :, i]
        if show_mask:
            masked_image = apply_mask(masked_image, mask, color)
        # Mask Polygon
        # Pad to ensure proper polygons for masks that touch image edges.
        # 遮罩多边形 ，填充以确保接触图像边缘的蒙版具有正确的多边形
        padded_mask = np.zeros(
            (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
        padded_mask[1:-1, 1:-1] = mask
        contours = find_contours(padded_mask, 0.5)   
        for verts in contours:
            # Subtract the padding and flip (y, x) to (x, y)
            verts = np.fliplr(verts) - 1
            p = Polygon(verts, facecolor="none", edgecolor=color)
            ax.add_patch(p)
    ax.imshow(masked_image.astype(np.uint8))
    if auto_show:
        plt.show()
    if save_path:
	    plt.savefig(save_path)

关于计算pre，rec，ap，IoU的自定义函数

该项目中提供了相关的工具，位于Mask-RCNN-master\mrcnn\untils.py中
计算AP值的自定义函数def commpute_ap代码如下：

def compute_ap(gt_boxes, gt_class_ids, gt_masks,
               pred_boxes, pred_class_ids, pred_scores, pred_masks,
               iou_threshold=0.5):
    """Compute Average Precision at a set IoU threshold (default 0.5).
    计算固定IoU阈值下的AP（默认0.5）
    Returns:
    mAP: Mean Average Precision
    precisions: List of precisions at different class score thresholds.
    recalls: List of recall values at different class score thresholds.
    overlaps(IoU): [pred_boxes, gt_boxes] IoU overlaps.
    """
    # Get matches and overlaps
    # 获取gt[1,n2]和pre[n1,1]的匹配情况，以及交并比[n1,n2]
    gt_match, pred_match, overlaps = compute_matches(
        gt_boxes, gt_class_ids, gt_masks,
        pred_boxes, pred_class_ids, pred_scores, pred_masks,
        iou_threshold)

    # Compute precision and recall at each prediction box step
    # 计算pre和rec
    # np.cumsum是在指定轴上进行逐个累加，即[1,2,3,4]变成[1,3,6,10]
    # 若不指定轴，则将整个数组视为一维数组
    # np.arange创建等差数组,此处就是创建[0,1,...,len()]的数组
    # precisions是一个list，list[i]表示前i个prediction的精确度（正确的预测框，除以预测框总数）
    # 此时precisions中是**前i个预测值的准确率**，与recall并无对应关系
    precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
    # recall，正确的预测框，除以所有的真实框
    # 此时代表的是前i个预测框所对应的recall值
    recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)

    # Pad with start and end values to simplify the math
    # 在首位填充0值以简化计算
    # np.concatenate是在指定的轴(默认axis=0)上对数组进行拼接
    # 此处就是在pre和rec的首位(axis=0)添加0值或1值
    # 但是这个首位的值，应该并没有任何含义，
    precisions = np.concatenate([[0], precisions, [0]])
    recalls = np.concatenate([[0], recalls, [1]])
    #注：召回率和准确率，是针对检测到的所有目标计算的？
    #    还是针对一张图计算的然后求所有图的均值？
    
    # Ensure precision values decrease but don't increase. This way, the
    # precision value at each recall threshold is the maximum it can be
    # for all following recall thresholds, as specified by the VOC paper.
    # 确保pre值降低而不是提高，这种方法对于每一个rec阈值所对应的pre是最大的，正如VOC paper中所指定的
    # 大概理解成，当rec阈值较小的时候，我们只从score最高的开始选，这时候pre就会较高？？
    # 总之，经此处理后，pre中的值递减变化，正好符合P-R曲线趋势，
    # 此时每个值对应的应该是对应rec阈值下的pre值
    for i in range(len(precisions) - 2, -1, -1):
        precisions[i] = np.maximum(precisions[i], precisions[i + 1])

    # Compute mean AP over recall range
    # 计算AP值
    # 对比recalls的两个切片recall[a:b]代表取[a,b)的切片，而不写则代表取到尽头，-1代表最后一个的序号
    # 序号从0开始，所以1代表第二个元素
    # indices是两个切片中，满足条件（此处是！=)的元素的索引号再+1
    # 其实应该是去重复值的操作，indices指示出的index是recalls中的值的索引，且不重复指出
    indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
    # mAP的计算（recall的差值*对应的pre),就是图像中一段段矩形的面积
    mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
                 precisions[indices])

    return mAP, precisions, recalls, overlaps