26medias · September 2, 2020 18:25 · Sep 2, 2020 · Sep 2, 2020
diff --git a/data_processing_cv.py b/data_processing_cv.py
@@ -0,0 +1,371 @@
+#
+# Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+#
+# NOTICE TO LICENSEE:
+#
+# This source code and/or documentation ("Licensed Deliverables") are
+# subject to NVIDIA intellectual property rights under U.S. and
+# international Copyright laws.
+#
+# These Licensed Deliverables contained herein is PROPRIETARY and
+# CONFIDENTIAL to NVIDIA and is being provided under the terms and
+# conditions of a form of NVIDIA software license agreement by and
+# between NVIDIA and Licensee ("License Agreement") or electronically
+# accepted by Licensee.  Notwithstanding any terms or conditions to
+# the contrary in the License Agreement, reproduction or disclosure
+# of the Licensed Deliverables to any third party without the express
+# written consent of NVIDIA is prohibited.
+#
+# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THESE LICENSED DELIVERABLES.
+#
+# U.S. Government End Users.  These Licensed Deliverables are a
+# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+# 1995), consisting of "commercial computer software" and "commercial
+# computer software documentation" as such terms are used in 48
+# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+# only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+# U.S. Government End Users acquire the Licensed Deliverables with
+# only those rights set forth herein.
+#
+# Any use of the Licensed Deliverables in individual and commercial
+# software must include, in the user documentation and internal
+# comments to the code, the above Disclaimer and U.S. Government End
+# Users Notice.
+#
+
+import math
+from PIL import Image
+import numpy as np
+import os
+
+
+# YOLOv3-608 has been trained with these 80 categories from COCO:
+# Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context."
+# European Conference on Computer Vision. Springer, Cham, 2014.
+
+def load_label_categories(label_file_path):
+    categories = [line.rstrip('\n') for line in open(label_file_path)]
+    return categories
+
+LABEL_FILE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'coco_labels.txt')
+ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)
+
+# Let's make sure that there are 80 classes, as expected for the COCO data set:
+CATEGORY_NUM = len(ALL_CATEGORIES)
+assert CATEGORY_NUM == 80
+
+
+class PreprocessYOLO(object):
+    """A simple class for loading images with PIL and reshaping them to the specified
+    input resolution for YOLOv3-608.
+    """
+
+    def __init__(self, yolo_input_resolution):
+        """Initialize with the input resolution for YOLOv3, which will stay fixed in this sample.
+
+        Keyword arguments:
+        yolo_input_resolution -- two-dimensional tuple with the target network's (spatial)
+        input resolution in HW order
+        """
+        self.yolo_input_resolution = yolo_input_resolution
+
+    def process(self, input_image):
+        """Load an image from the specified input path,
+        and return it together with a pre-processed version required for feeding it into a
+        YOLOv3 network.
+
+        Keyword arguments:
+        input_image_path -- string path of the image to be loaded
+        """
+        image_raw, image_resized = self._load_and_resize(input_image)
+        image_preprocessed = self._shuffle_and_normalize(image_resized)
+        return image_raw, image_preprocessed
+
+    def _load_and_resize(self, input_image):
+        """Load an image from the specified path and resize it to the input resolution.
+        Return the input image before resizing as a PIL Image (required for visualization),
+        and the resized image as a NumPy float array.
+
+        Keyword arguments:
+        input_image_path -- string path of the image to be loaded
+        """
+
+        image_raw = input_image
+        # Expecting yolo_input_resolution in (height, width) format, adjusting to PIL
+        # convention (width, height) in PIL:
+        new_resolution = (
+            self.yolo_input_resolution[1],
+            self.yolo_input_resolution[0])
+        image_resized = image_raw.resize(
+            new_resolution, resample=Image.BICUBIC)
+        image_resized = np.array(image_resized, dtype=np.float32, order='C')
+        return image_raw, image_resized
+
+    def _shuffle_and_normalize(self, image):
+        """Normalize a NumPy array representing an image to the range [0, 1], and
+        convert it from HWC format ("channels last") to NCHW format ("channels first"
+        with leading batch dimension).
+
+        Keyword arguments:
+        image -- image as three-dimensional NumPy float array, in HWC format
+        """
+        image /= 255.0
+        # HWC to CHW format:
+        image = np.transpose(image, [2, 0, 1])
+        # CHW to NCHW format
+        image = np.expand_dims(image, axis=0)
+        # Convert the image to row-major order, also known as "C order":
+        image = np.array(image, dtype=np.float32, order='C')
+        return image
+
+
+class PostprocessYOLO(object):
+    """Class for post-processing the three outputs tensors from YOLOv3-608."""
+
+    def __init__(self,
+                 yolo_masks,
+                 yolo_anchors,
+                 obj_threshold,
+                 nms_threshold,
+                 yolo_input_resolution):
+        """Initialize with all values that will be kept when processing several frames.
+        Assuming 3 outputs of the network in the case of (large) YOLOv3.
+
+        Keyword arguments:
+        yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
+        yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
+        object_threshold -- threshold for object coverage, float value between 0 and 1
+        nms_threshold -- threshold for non-max suppression algorithm,
+        float value between 0 and 1
+        input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
+        input resolution in HW order
+        """
+        self.masks = yolo_masks
+        self.anchors = yolo_anchors
+        self.object_threshold = obj_threshold
+        self.nms_threshold = nms_threshold
+        self.input_resolution_yolo = yolo_input_resolution
+
+    def process(self, outputs, resolution_raw):
+        """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
+        and return a list of bounding boxes for detected object together with their category
+        and their confidences in separate lists.
+
+        Keyword arguments:
+        outputs -- outputs from a TensorRT engine in NCHW format
+        resolution_raw -- the original spatial resolution from the input PIL image in WH order
+        """
+        outputs_reshaped = list()
+        for output in outputs:
+            outputs_reshaped.append(self._reshape_output(output))
+
+        boxes, categories, confidences = self._process_yolo_output(
+            outputs_reshaped, resolution_raw)
+
+        return boxes, categories, confidences
+
+    def _reshape_output(self, output):
+        """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
+        and then return it in (height,width,3,85) dimensionality after further reshaping.
+
+        Keyword argument:
+        output -- an output from a TensorRT engine after inference
+        """
+        output = np.transpose(output, [0, 2, 3, 1])
+        _, height, width, _ = output.shape
+        dim1, dim2 = height, width
+        dim3 = 3
+        # There are CATEGORY_NUM=80 object categories:
+        dim4 = (4 + 1 + CATEGORY_NUM)
+        return np.reshape(output, (dim1, dim2, dim3, dim4))
+
+    def _process_yolo_output(self, outputs_reshaped, resolution_raw):
+        """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
+        return a list of bounding boxes for detected object together with their category and their
+        confidences in separate lists.
+
+        Keyword arguments:
+        outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
+        with shape (height,width,3,85)
+        resolution_raw -- the original spatial resolution from the input PIL image in WH order
+        """
+
+        # E.g. in YOLOv3-608, there are three output tensors, which we associate with their
+        # respective masks. Then we iterate through all output-mask pairs and generate candidates
+        # for bounding boxes, their corresponding category predictions and their confidences:
+        boxes, categories, confidences = list(), list(), list()
+        for output, mask in zip(outputs_reshaped, self.masks):
+            box, category, confidence = self._process_feats(output, mask)
+            box, category, confidence = self._filter_boxes(box, category, confidence)
+            boxes.append(box)
+            categories.append(category)
+            confidences.append(confidence)
+
+        boxes = np.concatenate(boxes)
+        categories = np.concatenate(categories)
+        confidences = np.concatenate(confidences)
+
+        # Scale boxes back to original image shape:
+        width, height = resolution_raw
+        image_dims = [width, height, width, height]
+        boxes = boxes * image_dims
+
+        # Using the candidates from the previous (loop) step, we apply the non-max suppression
+        # algorithm that clusters adjacent bounding boxes to a single bounding box:
+        nms_boxes, nms_categories, nscores = list(), list(), list()
+        for category in set(categories):
+            idxs = np.where(categories == category)
+            box = boxes[idxs]
+            category = categories[idxs]
+            confidence = confidences[idxs]
+
+            keep = self._nms_boxes(box, confidence)
+
+            nms_boxes.append(box[keep])
+            nms_categories.append(category[keep])
+            nscores.append(confidence[keep])
+
+        if not nms_categories and not nscores:
+            return None, None, None
+
+        boxes = np.concatenate(nms_boxes)
+        categories = np.concatenate(nms_categories)
+        confidences = np.concatenate(nscores)
+
+        return boxes, categories, confidences
+
+    def _process_feats(self, output_reshaped, mask):
+        """Take in a reshaped YOLO output in height,width,3,85 format together with its
+        corresponding YOLO mask and return the detected bounding boxes, the confidence,
+        and the class probability in each cell/pixel.
+
+        Keyword arguments:
+        output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
+        mask -- 2-dimensional tuple with mask specification for this output
+        """
+
+        # Two in-line functions required for calculating the bounding box
+        # descriptors:
+        def sigmoid(value):
+            """Return the sigmoid of the input."""
+            return 1.0 / (1.0 + math.exp(-value))
+
+        def exponential(value):
+            """Return the exponential of the input."""
+            return math.exp(value)
+
+        # Vectorized calculation of above two functions:
+        sigmoid_v = np.vectorize(sigmoid)
+        exponential_v = np.vectorize(exponential)
+
+        grid_h, grid_w, _, _ = output_reshaped.shape
+
+        anchors = [self.anchors[i] for i in mask]
+
+        # Reshape to N, height, width, num_anchors, box_params:
+        anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
+        box_xy = sigmoid_v(output_reshaped[..., :2])
+        box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
+        box_confidence = sigmoid_v(output_reshaped[..., 4])
+
+        box_confidence = np.expand_dims(box_confidence, axis=-1)
+        box_class_probs = sigmoid_v(output_reshaped[..., 5:])
+
+        col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
+        row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
+
+        col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+        row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+        grid = np.concatenate((col, row), axis=-1)
+
+        box_xy += grid
+        box_xy /= (grid_w, grid_h)
+        box_wh /= self.input_resolution_yolo
+        box_xy -= (box_wh / 2.)
+        boxes = np.concatenate((box_xy, box_wh), axis=-1)
+
+        # boxes: centroids, box_confidence: confidence level, box_class_probs:
+        # class confidence
+        return boxes, box_confidence, box_class_probs
+
+    def _filter_boxes(self, boxes, box_confidences, box_class_probs):
+        """Take in the unfiltered bounding box descriptors and discard each cell
+        whose score is lower than the object threshold set during class initialization.
+
+        Keyword arguments:
+        boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
+        x,y,height,width coordinates of the boxes
+        box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
+        confidence scalar per element
+        box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
+
+        """
+        box_scores = box_confidences * box_class_probs
+        box_classes = np.argmax(box_scores, axis=-1)
+        box_class_scores = np.max(box_scores, axis=-1)
+        pos = np.where(box_class_scores >= self.object_threshold)
+
+        boxes = boxes[pos]
+        classes = box_classes[pos]
+        scores = box_class_scores[pos]
+
+        return boxes, classes, scores
+
+    def _nms_boxes(self, boxes, box_confidences):
+        """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
+        confidence scores and return an array with the indexes of the bounding boxes we want to
+        keep (and display later).
+
+        Keyword arguments:
+        boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
+        with shape (N,4); 4 for x,y,height,width coordinates of the boxes
+        box_confidences -- a Numpy array containing the corresponding confidences with shape N
+        """
+        x_coord = boxes[:, 0]
+        y_coord = boxes[:, 1]
+        width = boxes[:, 2]
+        height = boxes[:, 3]
+
+        areas = width * height
+        ordered = box_confidences.argsort()[::-1]
+
+        keep = list()
+        while ordered.size > 0:
+            # Index of the current element:
+            i = ordered[0]
+            keep.append(i)
+            xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
+            yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
+            xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
+            yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])
+
+            width1 = np.maximum(0.0, xx2 - xx1 + 1)
+            height1 = np.maximum(0.0, yy2 - yy1 + 1)
+            intersection = width1 * height1
+            union = (areas[i] + areas[ordered[1:]] - intersection)
+
+            # Compute the Intersection over Union (IoU) score:
+            iou = intersection / union
+
+            # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
+            # candidates to a minimum. In this step, we keep only those elements whose overlap
+            # with the current bounding box is lower than the threshold:
+            indexes = np.where(iou <= self.nms_threshold)[0]
+            ordered = ordered[indexes + 1]
+
+        keep = np.array(keep)
+        return keep
diff --git a/tensorrt_yolov3_video.py b/tensorrt_yolov3_video.py
@@ -0,0 +1,149 @@
+import numpy as np
+import cv2 as cv
+import os
+import sys
+import tensorrt as trt
+import pycuda.driver as cuda
+import pycuda.autoinit
+from PIL import ImageDraw
+from PIL import Image
+from data_processing_cv import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES
+
+import sys, os
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+TRT_LOGGER = trt.Logger()
+
+print(cv.__version__)
+
+
+onnx_file_path = 'yolov3.onnx'
+engine_file_path = "yolov3.trt"
+input_resolution_yolov3_HW = (608, 608)
+
+# Create a pre-processor object by specifying the required input resolution for YOLOv3
+preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
+
+
+
+# Imported from onnx_to_tensorrt.py, modified for OpenCV
+def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
+    print(bboxes, confidences, categories)
+    for box, score, category in zip(bboxes, confidences, categories):
+        cv.rectangle(image_raw,(round(x_coord),round(y_coord)),(round(x_coord+width),round(y_coord+height)),(0,255,255),1)
+
+    return image_raw
+
+
+
+# Imported from onnx_to_tensorrt.py 
+def get_engine(onnx_file_path, engine_file_path=""):
+    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
+    def build_engine():
+        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
+        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
+            builder.max_workspace_size = 1 << 28 # 256MiB
+            builder.max_batch_size = 1
+            # Parse model file
+            if not os.path.exists(onnx_file_path):
+                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
+                exit(0)
+            print('Loading ONNX file from path {}...'.format(onnx_file_path))
+            with open(onnx_file_path, 'rb') as model:
+                print('Beginning ONNX file parsing')
+                if not parser.parse(model.read()):
+                    print ('ERROR: Failed to parse the ONNX file.')
+                    for error in range(parser.num_errors):
+                        print (parser.get_error(error))
+                    return None
+            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
+            network.get_input(0).shape = [1, 3, 608, 608]
+            print('Completed parsing of ONNX file')
+            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
+            engine = builder.build_cuda_engine(network)
+            print("Completed creating Engine")
+            with open(engine_file_path, "wb") as f:
+                f.write(engine.serialize())
+            return engine
+
+    if os.path.exists(engine_file_path):
+        # If a serialized engine exists, use it instead of building an engine.
+        print("Reading engine from file {}".format(engine_file_path))
+        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
+            return runtime.deserialize_cuda_engine(f.read())
+    else:
+        return build_engine()
+
+
+
+
+
+# Create the engine
+engine = get_engine(onnx_file_path, engine_file_path)
+context = engine.create_execution_context()
+inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+
+
+
+
+# Apply YoloV3 on an OpenCV frame, return boxes, classes, scores
+def getYolo(img):
+    # Convert the CV2 image to PIL
+    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
+    im_pil = Image.fromarray(img)
+
+    image_raw, image = preprocessor.process(im_pil)
+    # Store the shape of the original input image in WH format, we will need it for later
+    shape_orig_WH = image_raw.size
+
+    # Output shapes expected by the post-processor
+    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
+    # Do inference with TensorRT
+    trt_outputs = []
+
+    # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
+    inputs[0].host = image
+    trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
+
+    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
+    trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
+
+    postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
+                          "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
+                                           (59, 119), (116, 90), (156, 198), (373, 326)],
+                          "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
+                          "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
+                          "yolo_input_resolution": input_resolution_yolov3_HW}
+
+    postprocessor = PostprocessYOLO(**postprocessor_args)
+
+    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
+    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
+
+    return boxes, classes, scores
+
+
+
+
+
+# Read a video, run YoloV3 at every frame
+cap = cv.VideoCapture('traffic.mp4')
+while cap.isOpened():
+    ret, frame = cap.read()
+    if not ret:
+        print("Can't receive frame (stream end?). Exiting ...")
+        break
+
+    # Apply YoloV3
+    boxes, classes, scores = getYolo(frame)
+
+    # Draw the rects
+    draw_bboxes(frame, boxes, scores, classes, ALL_CATEGORIES)
+
+    cv.namedWindow("result", cv.WINDOW_AUTOSIZE)
+    cv.imshow("result", frame)
+    if cv.waitKey(1) == ord('q'):
+        break
+cap.release()
+cv.destroyAllWindows()
No results found