/* * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include "nvdsinfer_custom_impl.h" #include "trt_utils.h" static const int NUM_CLASSES_YOLO = 6; #define NMS_THRESH 0.5 #define CONF_THRESH 0.4 #define BATCH_SIZE 1 extern "C" bool NvDsInferParseCustomYoloV5( std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList); extern "C" bool NvDsInferParseCustomYoloV4( std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList); extern "C" bool NvDsInferParseCustomYoloV3( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); extern "C" bool NvDsInferParseCustomYoloV3Tiny( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); extern "C" bool NvDsInferParseCustomYoloV2( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); extern "C" bool NvDsInferParseCustomYoloV2Tiny( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); extern "C" bool NvDsInferParseCustomYoloTLT( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// static constexpr int LOCATIONS = 4; struct alignas(float) Detection{ //center_x center_y w h float bbox[LOCATIONS]; float conf; // bbox_conf * cls_conf float class_id; }; float iou(float lbox[4], float rbox[4]) { float interBox[] = { std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom }; if(interBox[2] > interBox[3] || interBox[0] > interBox[1]) return 0.0f; float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]); return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS); } bool cmp(Detection& a, Detection& b) { return a.conf > b.conf; } void nms(std::vector& res, float *output, float conf_thresh, float nms_thresh = 0.5) { int det_size = sizeof(Detection) / sizeof(float); std::map> m; for (int i = 0; i < output[0] && i < 1000; i++) { if (output[1 + det_size * i + 4] <= conf_thresh) continue; Detection det; memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float)); if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector()); m[det.class_id].push_back(det); } for (auto it = m.begin(); it != m.end(); it++) { //std::cout << it->second[0].class_id << " --- " << std::endl; auto& dets = it->second; std::sort(dets.begin(), dets.end(), cmp); for (size_t m = 0; m < dets.size(); ++m) { auto& item = dets[m]; res.push_back(item); for (size_t n = m + 1; n < dets.size(); ++n) { if (iou(item.bbox, dets[n].bbox) > nms_thresh) { dets.erase(dets.begin()+n); --n; } } } } } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* This is a sample bounding box parsing function for the sample YoloV5m detector model */ static bool NvDsInferParseYoloV5( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured) { std::cerr << "WARNING: Num classes mismatch. Configured:" << detectionParams.numClassesConfigured << ", detected by network: " << NUM_CLASSES_YOLO << std::endl; } std::vector res; nms(res, (float*)(outputLayersInfo[0].buffer), CONF_THRESH, NMS_THRESH); //std::cout<<"Nms done sucessfully----"<(r.bbox[0]-r.bbox[2]*0.5f); oinfo.top = static_cast(r.bbox[1]-r.bbox[3]*0.5f); oinfo.width = static_cast(r.bbox[2]); oinfo.height = static_cast(r.bbox[3]); oinfo.detectionConfidence = r.conf; //std::cout << static_cast(r.bbox[0]) << "," << static_cast(r.bbox[1]) << "," << static_cast(r.bbox[2]) << "," // << static_cast(r.bbox[3]) << "," << static_cast(r.class_id) << "," << static_cast(r.conf) << std::endl; objectList.push_back(oinfo); } return true; } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* This is a sample bounding box parsing function for the sample YoloV4 detector model */ static bool NvDsInferParseYoloV4( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured) { std::cerr << "WARNING: Num classes mismatch. Configured:" << detectionParams.numClassesConfigured << ", detected by network: " << NUM_CLASSES_YOLO << std::endl; } std::vector res; nms(res, (float*)(outputLayersInfo[0].buffer), CONF_THRESH, NMS_THRESH); //std::cout<<"Nms done sucessfully----"<(r.bbox[0]-r.bbox[2]*0.5f); oinfo.top = static_cast(r.bbox[1]-r.bbox[3]*0.5f); oinfo.width = static_cast(r.bbox[2]); oinfo.height = static_cast(r.bbox[3]); oinfo.detectionConfidence = r.conf; //std::cout << static_cast(r.bbox[0]) << "," << static_cast(r.bbox[1]) << "," << static_cast(r.bbox[2]) << "," // << static_cast(r.bbox[3]) << "," << static_cast(r.class_id) << "," << static_cast(r.conf) << std::endl; objectList.push_back(oinfo); } return true; } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* This is a sample bounding box parsing function for the sample YoloV3 detector model */ static NvDsInferParseObjectInfo convertBBox(const float& bx, const float& by, const float& bw, const float& bh, const int& stride, const uint& netW, const uint& netH) { NvDsInferParseObjectInfo b; // Restore coordinates to network input resolution float xCenter = bx * stride; float yCenter = by * stride; float x0 = xCenter - bw / 2; float y0 = yCenter - bh / 2; float x1 = x0 + bw; float y1 = y0 + bh; x0 = clamp(x0, 0, netW); y0 = clamp(y0, 0, netH); x1 = clamp(x1, 0, netW); y1 = clamp(y1, 0, netH); b.left = x0; b.width = clamp(x1 - x0, 0, netW); b.top = y0; b.height = clamp(y1 - y0, 0, netH); return b; } static void addBBoxProposal(const float bx, const float by, const float bw, const float bh, const uint stride, const uint& netW, const uint& netH, const int maxIndex, const float maxProb, std::vector& binfo) { NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH); if (bbi.width < 1 || bbi.height < 1) return; bbi.detectionConfidence = maxProb; bbi.classId = maxIndex; binfo.push_back(bbi); } static std::vector decodeYoloV2Tensor( const float* detections, const std::vector &anchors, const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, const uint numOutputClasses, const uint& netW, const uint& netH) { std::vector binfo; for (uint y = 0; y < gridSizeH; ++y) { for (uint x = 0; x < gridSizeW; ++x) { for (uint b = 0; b < numBBoxes; ++b) { const float pw = anchors[b * 2]; const float ph = anchors[b * 2 + 1]; const int numGridCells = gridSizeH * gridSizeW; const int bbindex = y * gridSizeW + x; const float bx = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)]; const float by = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)]; const float bw = pw * exp (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]); const float bh = ph * exp (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]); const float objectness = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; float maxProb = 0.0f; int maxIndex = -1; for (uint i = 0; i < numOutputClasses; ++i) { float prob = (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]); if (prob > maxProb) { maxProb = prob; maxIndex = i; } } maxProb = objectness * maxProb; addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); } } } return binfo; } static std::vector decodeYoloV3Tensor( const float* detections, const std::vector &mask, const std::vector &anchors, const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes, const uint numOutputClasses, const uint& netW, const uint& netH) { std::vector binfo; for (uint y = 0; y < gridSizeH; ++y) { for (uint x = 0; x < gridSizeW; ++x) { for (uint b = 0; b < numBBoxes; ++b) { const float pw = anchors[mask[b] * 2]; const float ph = anchors[mask[b] * 2 + 1]; const int numGridCells = gridSizeH * gridSizeW; const int bbindex = y * gridSizeW + x; const float bx = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)]; const float by = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)]; const float bw = pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]; const float bh = ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]; const float objectness = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)]; float maxProb = 0.0f; int maxIndex = -1; for (uint i = 0; i < numOutputClasses; ++i) { float prob = (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]); if (prob > maxProb) { maxProb = prob; maxIndex = i; } } maxProb = objectness * maxProb; addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo); } } } return binfo; } static inline std::vector SortLayers(const std::vector & outputLayersInfo) { std::vector outLayers; for (auto const &layer : outputLayersInfo) { outLayers.push_back (&layer); } std::sort(outLayers.begin(), outLayers.end(), [](const NvDsInferLayerInfo* a, const NvDsInferLayerInfo* b) { return a->inferDims.d[1] < b->inferDims.d[1]; }); return outLayers; } static bool NvDsInferParseYoloV3( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList, const std::vector &anchors, const std::vector> &masks) { const uint kNUM_BBOXES = 3; const std::vector sortedLayers = SortLayers (outputLayersInfo); if (sortedLayers.size() != masks.size()) { std::cerr << "ERROR: yoloV3 output layer.size: " << sortedLayers.size() << " does not match mask.size: " << masks.size() << std::endl; return false; } if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured) { std::cerr << "WARNING: Num classes mismatch. Configured:" << detectionParams.numClassesConfigured << ", detected by network: " << NUM_CLASSES_YOLO << std::endl; } std::vector objects; for (uint idx = 0; idx < masks.size(); ++idx) { const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid assert(layer.inferDims.numDims == 3); const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); assert(stride == DIVUP(networkInfo.height, gridSizeH)); std::vector outObjs = decodeYoloV3Tensor((const float*)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES, NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height); objects.insert(objects.end(), outObjs.begin(), outObjs.end()); } objectList = objects; return true; } static bool NvDsInferParseYoloV2( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { // copy anchor data from yolov2.cfg file std::vector anchors = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828}; const uint kNUM_BBOXES = 5; if (outputLayersInfo.empty()) { std::cerr << "Could not find output layer in bbox parsing" << std::endl;; return false; } const NvDsInferLayerInfo &layer = outputLayersInfo[0]; if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured) { std::cerr << "WARNING: Num classes mismatch. Configured:" << detectionParams.numClassesConfigured << ", detected by network: " << NUM_CLASSES_YOLO << std::endl; } assert(layer.inferDims.numDims == 3); const uint gridSizeH = layer.inferDims.d[1]; const uint gridSizeW = layer.inferDims.d[2]; const uint stride = DIVUP(networkInfo.width, gridSizeW); assert(stride == DIVUP(networkInfo.height, gridSizeH)); for (auto& anchor : anchors) { anchor *= stride; } std::vector objects = decodeYoloV2Tensor((const float*)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES, NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height); objectList = objects; return true; } /* C-linkage to prevent name-mangling */ extern "C" bool NvDsInferParseCustomYoloV5( std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList) { return NvDsInferParseYoloV5( outputLayersInfo, networkInfo, detectionParams, objectList); } extern "C" bool NvDsInferParseCustomYoloV4( std::vector const &outputLayersInfo, NvDsInferNetworkInfo const &networkInfo, NvDsInferParseDetectionParams const &detectionParams, std::vector &objectList) { return NvDsInferParseYoloV4 ( outputLayersInfo, networkInfo, detectionParams, objectList); } extern "C" bool NvDsInferParseCustomYoloV3( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { static const std::vector kANCHORS = { 10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0}; static const std::vector> kMASKS = { {6, 7, 8}, {3, 4, 5}, {0, 1, 2}}; return NvDsInferParseYoloV3 ( outputLayersInfo, networkInfo, detectionParams, objectList, kANCHORS, kMASKS); } extern "C" bool NvDsInferParseCustomYoloV3Tiny( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { static const std::vector kANCHORS = { 10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}; static const std::vector> kMASKS = { {3, 4, 5}, //{0, 1, 2}}; // as per output result, select {1,2,3} {1, 2, 3}}; return NvDsInferParseYoloV3 ( outputLayersInfo, networkInfo, detectionParams, objectList, kANCHORS, kMASKS); } extern "C" bool NvDsInferParseCustomYoloV2( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { return NvDsInferParseYoloV2 ( outputLayersInfo, networkInfo, detectionParams, objectList); } extern "C" bool NvDsInferParseCustomYoloV2Tiny( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { return NvDsInferParseYoloV2 ( outputLayersInfo, networkInfo, detectionParams, objectList); } extern "C" bool NvDsInferParseCustomYoloTLT( std::vector const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo, NvDsInferParseDetectionParams const& detectionParams, std::vector& objectList) { if(outputLayersInfo.size() != 4) { std::cerr << "Mismatch in the number of output buffers." << "Expected 4 output buffers, detected in the network :" << outputLayersInfo.size() << std::endl; return false; } const int topK = 200; const int* keepCount = static_cast (outputLayersInfo.at(0).buffer); const float* boxes = static_cast (outputLayersInfo.at(1).buffer); const float* scores = static_cast (outputLayersInfo.at(2).buffer); const float* cls = static_cast (outputLayersInfo.at(3).buffer); for (int i = 0; (i < keepCount[0]) && (objectList.size() <= topK); ++i) { const float* loc = &boxes[0] + (i * 4); const float* conf = &scores[0] + i; const float* cls_id = &cls[0] + i; if(conf[0] > 1.001) continue; if((loc[0] < 0) || (loc[1] < 0) || (loc[2] < 0) || (loc[3] < 0)) continue; if((loc[0] > networkInfo.width) || (loc[2] > networkInfo.width) || (loc[1] > networkInfo.height) || (loc[3] > networkInfo.width)) continue; if((loc[2] < loc[0]) || (loc[3] < loc[1])) continue; if(((loc[3] - loc[1]) > networkInfo.height) || ((loc[2]-loc[0]) > networkInfo.width)) continue; NvDsInferParseObjectInfo curObj{static_cast(cls_id[0]), loc[0],loc[1],(loc[2]-loc[0]), (loc[3]-loc[1]), conf[0]}; objectList.push_back(curObj); } return true; } /* Check that the custom function has been defined correctly */ CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV5); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV4); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3Tiny); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2Tiny); CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloTLT);