/* * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include "yolo.h" #include "yoloPlugins.h" #include #include #include Yolo::Yolo(const NetworkInfo& networkInfo) : m_NetworkType(networkInfo.networkType), // yolov3 m_ConfigFilePath(networkInfo.configFilePath), // yolov3.cfg m_WtsFilePath(networkInfo.wtsFilePath), // yolov3.weights m_DeviceType(networkInfo.deviceType), // kDLA, kGPU m_InputBlobName(networkInfo.inputBlobName), // data m_InputH(0), m_InputW(0), m_InputC(0), m_InputSize(0) {} Yolo::~Yolo() { destroyNetworkUtils(); } nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder) { assert (builder); std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); std::vector trtWeights; nvinfer1::INetworkDefinition *network = builder->createNetworkV2(0); if (parseModel(*network) != NVDSINFER_SUCCESS) { network->destroy(); return nullptr; } // Build the engine std::cout << "Building the TensorRT Engine..." << std::endl; nvinfer1::IBuilderConfig *config = builder->createBuilderConfig(); nvinfer1::ICudaEngine * engine = builder->buildEngineWithConfig(*network, *config); if (engine) { std::cout << "Building complete!" << std::endl; } else { std::cerr << "Building engine failed!" << std::endl; } // destroy network->destroy(); delete config; return engine; } NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) { destroyNetworkUtils(); m_ConfigBlocks = parseConfigFile(m_ConfigFilePath); parseConfigBlocks(); std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); // build yolo network std::cout << "Building Yolo network..." << std::endl; NvDsInferStatus status = buildYoloNetwork(weights, network); if (status == NVDSINFER_SUCCESS) { std::cout << "Building yolo network complete!" << std::endl; } else { std::cerr << "Building yolo network failed!" << std::endl; } return status; } NvDsInferStatus Yolo::buildYoloNetwork( std::vector& weights, nvinfer1::INetworkDefinition& network) { int weightPtr = 0; int channels = m_InputC; nvinfer1::ITensor* data = network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, nvinfer1::Dims3{static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}); assert(data != nullptr && data->getDimensions().nbDims > 0); nvinfer1::ITensor* previous = data; std::vector tensorOutputs; uint outputTensorCount = 0; // build the network using the network API for (uint i = 0; i < m_ConfigBlocks.size(); ++i) { // check if num. of channels is correct assert(getNumChannels(previous) == channels); std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")"; if (m_ConfigBlocks.at(i).at("type") == "net") { printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); } else if (m_ConfigBlocks.at(i).at("type") == "convolutional") { std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out; std::string layerType; // check if batch_norm enabled if (m_ConfigBlocks.at(i).find("batch_normalize") != m_ConfigBlocks.at(i).end()) { out = netAddConvBNLeaky(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network); layerType = "conv-bn-leaky"; } else { out = netAddConvLinear(i, m_ConfigBlocks.at(i), weights, m_TrtWeights, weightPtr, channels, previous, &network); layerType = "conv-linear"; } previous = out->getOutput(0); assert(previous != nullptr); channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); } else if (m_ConfigBlocks.at(i).at("type") == "shortcut") { assert(m_ConfigBlocks.at(i).at("activation") == "linear"); assert(m_ConfigBlocks.at(i).find("from") != m_ConfigBlocks.at(i).end()); int from = stoi(m_ConfigBlocks.at(i).at("from")); std::string inputVol = dimsToString(previous->getDimensions()); // check if indexes are correct assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); assert(i + from - 1 < i - 2); nvinfer1::IElementWiseLayer* ew = network.addElementWise( *tensorOutputs[i - 2], *tensorOutputs[i + from - 1], nvinfer1::ElementWiseOperation::kSUM); assert(ew != nullptr); std::string ewLayerName = "shortcut_" + std::to_string(i); ew->setName(ewLayerName.c_str()); previous = ew->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(ew->getOutput(0)); printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); } else if (m_ConfigBlocks.at(i).at("type") == "yolo") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); curYoloTensor.gridSize = prevTensorDims.d[1]; curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSize * curYoloTensor.gridSize * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); std::string layerName = "yolo_" + std::to_string(i); curYoloTensor.blobName = layerName; nvinfer1::IPluginV2* yoloPlugin = new YoloLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, m_OutputTensors.at(outputTensorCount).numClasses, m_OutputTensors.at(outputTensorCount).gridSize); assert(yoloPlugin != nullptr); nvinfer1::IPluginV2Layer* yolo = network.addPluginV2(&previous, 1, *yoloPlugin); assert(yolo != nullptr); yolo->setName(layerName.c_str()); std::string inputVol = dimsToString(previous->getDimensions()); previous = yolo->getOutput(0); assert(previous != nullptr); previous->setName(layerName.c_str()); std::string outputVol = dimsToString(previous->getDimensions()); network.markOutput(*previous); channels = getNumChannels(previous); tensorOutputs.push_back(yolo->getOutput(0)); printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); ++outputTensorCount; } else if (m_ConfigBlocks.at(i).at("type") == "region") { std::string layerName = "region_" + std::to_string(i); nvinfer1::Dims prevTensorDims = previous->getDimensions(); assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount); curRegionTensor.gridSize = prevTensorDims.d[1]; curRegionTensor.stride = m_InputW / curRegionTensor.gridSize; m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSize * curRegionTensor.gridSize * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses)); curRegionTensor.blobName = layerName; auto creator = getPluginRegistry()->getPluginCreator("Region_TRT", "1"); int num = static_cast(curRegionTensor.numBBoxes); int coords = 4; int classes = static_cast(curRegionTensor.numClasses); nvinfer1::PluginField fields[]{ {"num", &num, nvinfer1::PluginFieldType::kINT32, 1}, {"coords", &coords, nvinfer1::PluginFieldType::kINT32, 1}, {"classes", &classes, nvinfer1::PluginFieldType::kINT32, 1}, {"smTree", nullptr, nvinfer1::PluginFieldType::kINT32, 1} }; nvinfer1::PluginFieldCollection pluginData; pluginData.nbFields = 4; pluginData.fields = fields; nvinfer1::IPluginV2 *regionPlugin = creator->createPlugin(layerName.c_str(), &pluginData); assert(regionPlugin != nullptr); nvinfer1::IPluginV2Layer* region = network.addPluginV2(&previous, 1, *regionPlugin); assert(region != nullptr); std::string inputVol = dimsToString(previous->getDimensions()); previous = region->getOutput(0); assert(previous != nullptr); previous->setName(layerName.c_str()); std::string outputVol = dimsToString(previous->getDimensions()); network.markOutput(*previous); channels = getNumChannels(previous); tensorOutputs.push_back(region->getOutput(0)); printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr)); std::cout << "Anchors are being converted to network input resolution i.e. Anchors x " << curRegionTensor.stride << " (stride)" << std::endl; for (auto& anchor : curRegionTensor.anchors) anchor *= curRegionTensor.stride; ++outputTensorCount; } else if (m_ConfigBlocks.at(i).at("type") == "reorg") { auto creator = getPluginRegistry()->getPluginCreator("Reorg_TRT", "1"); int stride = 2; nvinfer1::PluginField strideField{"stride", &stride, nvinfer1::PluginFieldType::kINT32, 1}; nvinfer1::PluginFieldCollection pluginData; pluginData.nbFields = 1; pluginData.fields = &strideField; std::string layerName = "reorg_" + std::to_string(i); nvinfer1::IPluginV2 *reorgPlugin = creator->createPlugin(layerName.c_str(), &pluginData); assert(reorgPlugin != nullptr); nvinfer1::IPluginV2Layer* reorg = network.addPluginV2(&previous, 1, *reorgPlugin); assert(reorg != nullptr); std::string inputVol = dimsToString(previous->getDimensions()); previous = reorg->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); channels = getNumChannels(previous); tensorOutputs.push_back(reorg->getOutput(0)); printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr)); } // route layers (single or concat) else if (m_ConfigBlocks.at(i).at("type") == "route") { std::string strLayers = m_ConfigBlocks.at(i).at("layers"); std::vector idxLayers; size_t lastPos = 0, pos = 0; while ((pos = strLayers.find(',', lastPos)) != std::string::npos) { int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos))); idxLayers.push_back (vL); lastPos = pos + 1; } if (lastPos < strLayers.length()) { std::string lastV = trim(strLayers.substr(lastPos)); if (!lastV.empty()) { idxLayers.push_back (std::stoi(lastV)); } } assert (!idxLayers.empty()); std::vector concatInputs; for (int idxLayer : idxLayers) { if (idxLayer < 0) { idxLayer = tensorOutputs.size() + idxLayer; } assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size()); concatInputs.push_back (tensorOutputs[idxLayer]); } nvinfer1::IConcatenationLayer* concat = network.addConcatenation(concatInputs.data(), concatInputs.size()); assert(concat != nullptr); std::string concatLayerName = "route_" + std::to_string(i - 1); concat->setName(concatLayerName.c_str()); // concatenate along the channel dimension concat->setAxis(0); previous = concat->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); // set the output volume depth channels = getNumChannels(previous); tensorOutputs.push_back(concat->getOutput(0)); printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); } else if (m_ConfigBlocks.at(i).at("type") == "upsample") { std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out = netAddUpsample(i - 1, m_ConfigBlocks[i], weights, m_TrtWeights, channels, previous, &network); previous = out->getOutput(0); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); } else if (m_ConfigBlocks.at(i).at("type") == "maxpool") { std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out = netAddMaxpool(i, m_ConfigBlocks.at(i), previous, &network); previous = out->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); } else { std::cout << "Unsupported layer type --> \"" << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl; assert(0); } } if ((int)weights.size() != weightPtr) { std::cout << "Number of unused weights left : " << weights.size() - weightPtr << std::endl; assert(0); } std::cout << "Output yolo blob names :" << std::endl; for (auto& tensor : m_OutputTensors) { std::cout << tensor.blobName << std::endl; } int nbLayers = network.getNbLayers(); std::cout << "Total number of yolo layers: " << nbLayers << std::endl; return NVDSINFER_SUCCESS; } std::vector> Yolo::parseConfigFile (const std::string cfgFilePath) { assert(fileExists(cfgFilePath)); std::ifstream file(cfgFilePath); assert(file.good()); std::string line; std::vector> blocks; std::map block; while (getline(file, line)) { if (line.size() == 0) continue; if (line.front() == '#') continue; line = trim(line); if (line.front() == '[') { if (block.size() > 0) { blocks.push_back(block); block.clear(); } std::string key = "type"; std::string value = trim(line.substr(1, line.size() - 2)); block.insert(std::pair(key, value)); } else { int cpos = line.find('='); std::string key = trim(line.substr(0, cpos)); std::string value = trim(line.substr(cpos + 1)); block.insert(std::pair(key, value)); } } blocks.push_back(block); return blocks; } void Yolo::parseConfigBlocks() { for (auto block : m_ConfigBlocks) { if (block.at("type") == "net") { assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg"); assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg"); m_InputH = std::stoul(block.at("height")); m_InputW = std::stoul(block.at("width")); m_InputC = std::stoul(block.at("channels")); assert(m_InputW == m_InputH); m_InputSize = m_InputC * m_InputH * m_InputW; } else if ((block.at("type") == "region") || (block.at("type") == "yolo")) { assert((block.find("num") != block.end()) && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); assert((block.find("classes") != block.end()) && std::string("Missing 'classes' param in " + block.at("type") + " layer") .c_str()); assert((block.find("anchors") != block.end()) && std::string("Missing 'anchors' param in " + block.at("type") + " layer") .c_str()); TensorInfo outputTensor; std::string anchorString = block.at("anchors"); while (!anchorString.empty()) { int npos = anchorString.find_first_of(','); if (npos != -1) { float anchor = std::stof(trim(anchorString.substr(0, npos))); outputTensor.anchors.push_back(anchor); anchorString.erase(0, npos + 1); } else { float anchor = std::stof(trim(anchorString)); outputTensor.anchors.push_back(anchor); break; } } if ((m_NetworkType == "yolov3") || (m_NetworkType == "yolov3-tiny")) { assert((block.find("mask") != block.end()) && std::string("Missing 'mask' param in " + block.at("type") + " layer") .c_str()); std::string maskString = block.at("mask"); while (!maskString.empty()) { int npos = maskString.find_first_of(','); if (npos != -1) { uint mask = std::stoul(trim(maskString.substr(0, npos))); outputTensor.masks.push_back(mask); maskString.erase(0, npos + 1); } else { uint mask = std::stoul(trim(maskString)); outputTensor.masks.push_back(mask); break; } } } outputTensor.numBBoxes = outputTensor.masks.size() > 0 ? outputTensor.masks.size() : std::stoul(trim(block.at("num"))); outputTensor.numClasses = std::stoul(block.at("classes")); m_OutputTensors.push_back(outputTensor); } } } void Yolo::destroyNetworkUtils() { // deallocate the weights for (uint i = 0; i < m_TrtWeights.size(); ++i) { if (m_TrtWeights[i].count > 0) free(const_cast(m_TrtWeights[i].values)); } m_TrtWeights.clear(); }