人工智能学习81-Yolo预测类
人工智能学习81-Yolo预测类—快手视频
人工智能学习82-Yolo预测类—快手视频
YoLo预测类
Yolo预测类加载预测模型,提供三个预测方法,一是预测图片的方法detect_image(),二是计算每秒帧数的方法get_FPS(),三是预测热成像的方法detect_heatmap()。
Yolo.py类
import colorsys
import os
import timeimport numpy as np
from keras import backend as K
from PIL import ImageDraw, ImageFontfrom yolo_model import get_yolo_model
from utils import (cvtColor, get_anchors, get_classes, preprocess_input,resize_image, show_config)
from utils_bbox import DecodeBoxclass YOLO(object):_defaults = {"model_path": '../model_data/yolo_weights.h5', # 原来是:yolo_weights.h5 , best_epoch_weights.h5"classes_path": '../model_data/coco_classes.txt', # 原来是:coco_classes.txt , voc_classes.txt"anchors_path": '../model_data/yolo_anchors.txt',"anchors_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]],"input_shape": [416, 416],"confidence": 0.5,"nms_iou": 0.3,"max_boxes": 100,"letterbox_image": False,}@classmethoddef get_defaults(cls, n):if n in cls._defaults:return cls._defaults[n]else:return "Unrecognized attribute name '" + n + "'"def __init__(self, **kwargs):self.__dict__.update(self._defaults)for name, value in kwargs.items():setattr(self, name, value)self._defaults[name] = valueprint("yolo.py __init__ name={},value={}".format(name, value))self.class_names, self.num_classes = get_classes(self.classes_path)self.anchors, self.num_anchors = get_anchors(self.anchors_path) # shape(9,2)hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] # (0.01,1.0,1.0)第一个元素为小数self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))self.input_image_shape = K.placeholder(shape=(2,)) # 输入图片大小替位符self.sess = K.get_session() # 使用Tensorflow-1.13.0self.boxes, self.scores, self.classes = self.generate()show_config(**self._defaults)def generate(self):model_path = os.path.expanduser(self.model_path)assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'self.yolo_model = get_yolo_model([None, None, 3], self.anchors_mask, self.num_classes)self.yolo_model.load_weights(self.model_path)print('装入模型:{} model, anchors, and classes loaded.'.format(model_path))boxes, scores, classes = DecodeBox(self.yolo_model.output, # 模型定义的输出,是nets\yolo.py中yolo_body函数中的张量[P5, P4, P3]self.anchors, # 先验框 [116,90],[156,198],[373,326] [30,61],[62,45],[59,119] [10,13],[16,30],[33,23]self.num_classes, # 目标分类self.input_image_shape, # 实际输入图片大小self.input_shape, # 处理图片大小[416, 416]anchor_mask=self.anchors_mask, # [[6, 7, 8], [3, 4, 5], [0, 1, 2]]max_boxes=self.max_boxes,confidence=self.confidence,nms_iou=self.nms_iou,letterbox_image=self.letterbox_image)return boxes, scores, classesdef detect_image(self, image, crop=False, count=False):image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)print("K.learning_phase()={}".format(K.learning_phase()))print("self.yolo_model.input={}".format(self.yolo_model.input))print("self.input_image_shape={}".format(self.input_image_shape))out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})print('Found {} boxes for {}'.format(len(out_boxes), 'img'))font = ImageFont.truetype(font='../model_data/simhei.ttf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))if count:print("top_label:", out_classes)classes_nums = np.zeros([self.num_classes])for i in range(self.num_classes):num = np.sum(out_classes == i)if num > 0:print(self.class_names[i], " : ", num)classes_nums[i] = numprint("classes_nums:", classes_nums)if crop:for i, c in list(enumerate(out_boxes)):top, left, bottom, right = out_boxes[i]top = max(0, np.floor(top).astype('int32'))left = max(0, np.floor(left).astype('int32'))bottom = min(image.size[1], np.floor(bottom).astype('int32'))right = min(image.size[0], np.floor(right).astype('int32'))dir_save_path = "img_crop"if not os.path.exists(dir_save_path):os.makedirs(dir_save_path)crop_image = image.crop([left, top, right, bottom])crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)print("save crop_" + str(i) + ".png to " + dir_save_path)for i, c in list(enumerate(out_classes)):predicted_class = self.class_names[int(c)]box = out_boxes[i]score = out_scores[i]top, left, bottom, right = boxtop = max(0, np.floor(top).astype('int32'))left = max(0, np.floor(left).astype('int32'))bottom = min(image.size[1], np.floor(bottom).astype('int32'))right = min(image.size[0], np.floor(right).astype('int32'))label = '{} {:.2f}'.format(predicted_class, score)draw = ImageDraw.Draw(image)label_size = draw.textsize(label, font)label = label.encode('utf-8')print(label, top, left, bottom, right)if top - label_size[1] >= 0:text_origin = np.array([left, top - label_size[1]])else:text_origin = np.array([left, top + 1])for i in range(thickness):draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)del drawreturn imagedef get_FPS(self, image, test_interval):image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})t1 = time.time()for _ in range(test_interval):out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})t2 = time.time()tact_time = (t2 - t1) / test_intervalreturn tact_timedef detect_heatmap(self, image, heatmap_save_path):import cv2import matplotlib.pyplot as pltdef sigmoid(x):y = 1.0 / (1.0 + np.exp(-x))return yimage = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)output = self.yolo_model.predict(image_data)plt.imshow(image, alpha=1)plt.axis('off')mask = np.zeros((image.size[1], image.size[0]))for sub_output in output:b, h, w, c = np.shape(sub_output)sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]score = np.max(sigmoid(sub_output[..., 4]), -1)score = cv2.resize(score, (image.size[0], image.size[1]))normed_score = (score * 255).astype('uint8')mask = np.maximum(mask, normed_score)plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")plt.axis('off')plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)plt.margins(0, 0)plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1)print("Save to the " + heatmap_save_path)plt.show()def get_map_txt(self, image_id, image, class_names, map_out_path):f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})for i, c in enumerate(out_classes):predicted_class = self.class_names[int(c)]score = str(out_scores[i])top, left, bottom, right = out_boxes[i]if predicted_class not in class_names:continuef.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))f.close()returndef close_session(self):self.sess.close()
Yolo预测类
import colorsys
import os
import timeimport numpy as np
from keras import backend as K
from PIL import ImageDraw, ImageFontfrom yolo_model import get_yolo_model
from utils import (cvtColor, get_anchors, get_classes, preprocess_input,resize_image, show_config)
from utils_bbox import DecodeBoxclass YOLO(object):_defaults = {"model_path": '../model_data/yolo_weights.h5', # 原来是:yolo_weights.h5 , best_epoch_weights.h5"classes_path": '../model_data/coco_classes.txt', # 原来是:coco_classes.txt , voc_classes.txt"anchors_path": '../model_data/yolo_anchors.txt',"anchors_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]],"input_shape": [416, 416],"confidence": 0.5,"nms_iou": 0.3,"max_boxes": 100,"letterbox_image": False,}@classmethoddef get_defaults(cls, n):if n in cls._defaults:return cls._defaults[n]else:return "Unrecognized attribute name '" + n + "'"def __init__(self, **kwargs):self.__dict__.update(self._defaults)for name, value in kwargs.items():setattr(self, name, value)self._defaults[name] = valueprint("yolo.py __init__ name={},value={}".format(name, value))self.class_names, self.num_classes = get_classes(self.classes_path)self.anchors, self.num_anchors = get_anchors(self.anchors_path) # shape(9,2)hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] # (0.01,1.0,1.0)第一个元素为小数self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))self.input_image_shape = K.placeholder(shape=(2,)) # 输入图片大小替位符self.sess = K.get_session() # 使用Tensorflow-1.13.0self.boxes, self.scores, self.classes = self.generate()show_config(**self._defaults)def generate(self):model_path = os.path.expanduser(self.model_path)assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'self.yolo_model = get_yolo_model([None, None, 3], self.anchors_mask, self.num_classes)self.yolo_model.load_weights(self.model_path)print('装入模型:{} model, anchors, and classes loaded.'.format(model_path))boxes, scores, classes = DecodeBox(self.yolo_model.output, # 模型定义的输出,是nets\yolo.py中yolo_body函数中的张量[P5, P4, P3]self.anchors, # 先验框 [116,90],[156,198],[373,326] [30,61],[62,45],[59,119] [10,13],[16,30],[33,23]self.num_classes, # 目标分类self.input_image_shape, # 实际输入图片大小self.input_shape, # 处理图片大小[416, 416]anchor_mask=self.anchors_mask, # [[6, 7, 8], [3, 4, 5], [0, 1, 2]]max_boxes=self.max_boxes,confidence=self.confidence,nms_iou=self.nms_iou,letterbox_image=self.letterbox_image)return boxes, scores, classesdef detect_image(self, image, crop=False, count=False):image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)print("K.learning_phase()={}".format(K.learning_phase()))print("self.yolo_model.input={}".format(self.yolo_model.input))print("self.input_image_shape={}".format(self.input_image_shape))out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})print('Found {} boxes for {}'.format(len(out_boxes), 'img'))font = ImageFont.truetype(font='../model_data/simhei.ttf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))if count:print("top_label:", out_classes)classes_nums = np.zeros([self.num_classes])for i in range(self.num_classes):num = np.sum(out_classes == i)if num > 0:print(self.class_names[i], " : ", num)classes_nums[i] = numprint("classes_nums:", classes_nums)if crop:for i, c in list(enumerate(out_boxes)):top, left, bottom, right = out_boxes[i]top = max(0, np.floor(top).astype('int32'))left = max(0, np.floor(left).astype('int32'))bottom = min(image.size[1], np.floor(bottom).astype('int32'))right = min(image.size[0], np.floor(right).astype('int32'))dir_save_path = "img_crop"if not os.path.exists(dir_save_path):os.makedirs(dir_save_path)crop_image = image.crop([left, top, right, bottom])crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)print("save crop_" + str(i) + ".png to " + dir_save_path)for i, c in list(enumerate(out_classes)):predicted_class = self.class_names[int(c)]box = out_boxes[i]score = out_scores[i]top, left, bottom, right = boxtop = max(0, np.floor(top).astype('int32'))left = max(0, np.floor(left).astype('int32'))bottom = min(image.size[1], np.floor(bottom).astype('int32'))right = min(image.size[0], np.floor(right).astype('int32'))label = '{} {:.2f}'.format(predicted_class, score)draw = ImageDraw.Draw(image)label_size = draw.textsize(label, font)label = label.encode('utf-8')print(label, top, left, bottom, right)if top - label_size[1] >= 0:text_origin = np.array([left, top - label_size[1]])else:text_origin = np.array([left, top + 1])for i in range(thickness):draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)del drawreturn imagedef get_FPS(self, image, test_interval):image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})t1 = time.time()for _ in range(test_interval):out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})t2 = time.time()tact_time = (t2 - t1) / test_intervalreturn tact_timedef detect_heatmap(self, image, heatmap_save_path):import cv2import matplotlib.pyplot as pltdef sigmoid(x):y = 1.0 / (1.0 + np.exp(-x))return yimage = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)output = self.yolo_model.predict(image_data)plt.imshow(image, alpha=1)plt.axis('off')mask = np.zeros((image.size[1], image.size[0]))for sub_output in output:b, h, w, c = np.shape(sub_output)sub_output = np.reshape(sub_output, [b, h, w, 3, -1])[0]score = np.max(sigmoid(sub_output[..., 4]), -1)score = cv2.resize(score, (image.size[0], image.size[1]))normed_score = (score * 255).astype('uint8')mask = np.maximum(mask, normed_score)plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")plt.axis('off')plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)plt.margins(0, 0)plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1)print("Save to the " + heatmap_save_path)plt.show()def get_map_txt(self, image_id, image, class_names, map_out_path):f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w")image = cvtColor(image)image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)image_data = np.expand_dims(preprocess_input(np.array(image_data, dtype='float32')), 0)out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})for i, c in enumerate(out_classes):predicted_class = self.class_names[int(c)]score = str(out_scores[i])top, left, bottom, right = out_boxes[i]if predicted_class not in class_names:continuef.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))f.close()returndef close_session(self):self.sess.close()
代码解释部分
方法generate第59行
方法DecodeBox()返回预测框坐标信息,预测框存在物体置信度,物体分类信息。此方法返回的只是张量,没有返回数据,可以理解只返回了求解预测框坐标信息,物体置信度,物体分类信息的算法,还没有求解数据。
方法detect_image第81行
根据方法DecodeBox() 返回预测框坐标信息,预测框存在物体置信度,物体分类信息的算法,求解真实数据。