first commit

This commit is contained in:
nqthai199@gmail.com
2022-09-15 09:26:49 +07:00
commit df3dd9a705
168 changed files with 67159 additions and 0 deletions

440
lib_ncnn/ncnn/allocator.h Executable file
View File

@@ -0,0 +1,440 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_ALLOCATOR_H
#define NCNN_ALLOCATOR_H
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "platform.h"
#include <stdlib.h>
#if NCNN_VULKAN
#include <vulkan/vulkan.h>
#endif // NCNN_VULKAN
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 26
#include <android/hardware_buffer.h>
#endif // __ANDROID_API__ >= 26
#endif // NCNN_PLATFORM_API
namespace ncnn {
// the alignment of all the allocated buffers
#if NCNN_AVX512
#define NCNN_MALLOC_ALIGN 64
#elif NCNN_AVX
#define NCNN_MALLOC_ALIGN 32
#else
#define NCNN_MALLOC_ALIGN 16
#endif
// we have some optimized kernels that may overread buffer a bit in loop
// it is common to interleave next-loop data load with arithmetic instructions
// allocating more bytes keeps us safe from SEGV_ACCERR failure
#define NCNN_MALLOC_OVERREAD 64
// Aligns a pointer to the specified number of bytes
// ptr Aligned pointer
// n Alignment size that must be a power of two
template<typename _Tp>
static NCNN_FORCEINLINE _Tp* alignPtr(_Tp* ptr, int n = (int)sizeof(_Tp))
{
return (_Tp*)(((size_t)ptr + n - 1) & -n);
}
// Aligns a buffer size to the specified number of bytes
// The function returns the minimum number that is greater or equal to sz and is divisible by n
// sz Buffer size to align
// n Alignment size that must be a power of two
static NCNN_FORCEINLINE size_t alignSize(size_t sz, int n)
{
return (sz + n - 1) & -n;
}
static NCNN_FORCEINLINE void* fastMalloc(size_t size)
{
#if _MSC_VER
return _aligned_malloc(size, NCNN_MALLOC_ALIGN);
#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
void* ptr = 0;
if (posix_memalign(&ptr, NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD))
ptr = 0;
return ptr;
#elif __ANDROID__ && __ANDROID_API__ < 17
return memalign(NCNN_MALLOC_ALIGN, size + NCNN_MALLOC_OVERREAD);
#else
unsigned char* udata = (unsigned char*)malloc(size + sizeof(void*) + NCNN_MALLOC_ALIGN + NCNN_MALLOC_OVERREAD);
if (!udata)
return 0;
unsigned char** adata = alignPtr((unsigned char**)udata + 1, NCNN_MALLOC_ALIGN);
adata[-1] = udata;
return adata;
#endif
}
static NCNN_FORCEINLINE void fastFree(void* ptr)
{
if (ptr)
{
#if _MSC_VER
_aligned_free(ptr);
#elif (defined(__unix__) || defined(__APPLE__)) && _POSIX_C_SOURCE >= 200112L || (__ANDROID__ && __ANDROID_API__ >= 17)
free(ptr);
#elif __ANDROID__ && __ANDROID_API__ < 17
free(ptr);
#else
unsigned char* udata = ((unsigned char**)ptr)[-1];
free(udata);
#endif
}
}
#if NCNN_THREADS
// exchange-add operation for atomic operations on reference counters
#if defined __riscv && !defined __riscv_atomic
// riscv target without A extension
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;
return tmp;
}
#elif defined __INTEL_COMPILER && !(defined WIN32 || defined _WIN32)
// atomic increment on the linux version of the Intel(tm) compiler
#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
#elif defined __GNUC__
#if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
#ifdef __ATOMIC_ACQ_REL
#define NCNN_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
#else
#define NCNN_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
#endif
#else
#if defined __ATOMIC_ACQ_REL && !defined __clang__
// version for gcc >= 4.7
#define NCNN_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
#else
#define NCNN_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
#endif
#endif
#elif defined _MSC_VER && !defined RC_INVOKED
#define NCNN_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
#else
// thread-unsafe branch
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;
return tmp;
}
#endif
#else // NCNN_THREADS
static NCNN_FORCEINLINE int NCNN_XADD(int* addr, int delta)
{
int tmp = *addr;
*addr += delta;
return tmp;
}
#endif // NCNN_THREADS
class NCNN_EXPORT Allocator
{
public:
virtual ~Allocator();
virtual void* fastMalloc(size_t size) = 0;
virtual void fastFree(void* ptr) = 0;
};
class PoolAllocatorPrivate;
class NCNN_EXPORT PoolAllocator : public Allocator
{
public:
PoolAllocator();
~PoolAllocator();
// ratio range 0 ~ 1
// default cr = 0.75
void set_size_compare_ratio(float scr);
// release all budgets immediately
void clear();
virtual void* fastMalloc(size_t size);
virtual void fastFree(void* ptr);
private:
PoolAllocator(const PoolAllocator&);
PoolAllocator& operator=(const PoolAllocator&);
private:
PoolAllocatorPrivate* const d;
};
class UnlockedPoolAllocatorPrivate;
class NCNN_EXPORT UnlockedPoolAllocator : public Allocator
{
public:
UnlockedPoolAllocator();
~UnlockedPoolAllocator();
// ratio range 0 ~ 1
// default cr = 0.75
void set_size_compare_ratio(float scr);
// release all budgets immediately
void clear();
virtual void* fastMalloc(size_t size);
virtual void fastFree(void* ptr);
private:
UnlockedPoolAllocator(const UnlockedPoolAllocator&);
UnlockedPoolAllocator& operator=(const UnlockedPoolAllocator&);
private:
UnlockedPoolAllocatorPrivate* const d;
};
#if NCNN_VULKAN
class VulkanDevice;
class NCNN_EXPORT VkBufferMemory
{
public:
VkBuffer buffer;
// the base offset assigned by allocator
size_t offset;
size_t capacity;
VkDeviceMemory memory;
void* mapped_ptr;
// buffer state, modified by command functions internally
mutable VkAccessFlags access_flags;
mutable VkPipelineStageFlags stage_flags;
// initialize and modified by mat
int refcount;
};
class NCNN_EXPORT VkImageMemory
{
public:
VkImage image;
VkImageView imageview;
// underlying info assigned by allocator
int width;
int height;
int depth;
VkFormat format;
VkDeviceMemory memory;
void* mapped_ptr;
// the base offset assigned by allocator
size_t bind_offset;
size_t bind_capacity;
// image state, modified by command functions internally
mutable VkAccessFlags access_flags;
mutable VkImageLayout image_layout;
mutable VkPipelineStageFlags stage_flags;
// in-execution state, modified by command functions internally
mutable int command_refcount;
// initialize and modified by mat
int refcount;
};
class NCNN_EXPORT VkAllocator
{
public:
explicit VkAllocator(const VulkanDevice* _vkdev);
virtual ~VkAllocator();
virtual void clear();
virtual VkBufferMemory* fastMalloc(size_t size) = 0;
virtual void fastFree(VkBufferMemory* ptr) = 0;
virtual int flush(VkBufferMemory* ptr);
virtual int invalidate(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0;
virtual void fastFree(VkImageMemory* ptr) = 0;
public:
const VulkanDevice* vkdev;
uint32_t buffer_memory_type_index;
uint32_t image_memory_type_index;
uint32_t reserved_type_index;
bool mappable;
bool coherent;
protected:
VkBuffer create_buffer(size_t size, VkBufferUsageFlags usage);
VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);
VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
VkImageView create_imageview(VkImage image, VkFormat format);
};
class VkBlobAllocatorPrivate;
class NCNN_EXPORT VkBlobAllocator : public VkAllocator
{
public:
explicit VkBlobAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 16 * 1024 * 1024); // 16M
virtual ~VkBlobAllocator();
public:
// release all budgets immediately
virtual void clear();
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
private:
VkBlobAllocator(const VkBlobAllocator&);
VkBlobAllocator& operator=(const VkBlobAllocator&);
private:
VkBlobAllocatorPrivate* const d;
};
class VkWeightAllocatorPrivate;
class NCNN_EXPORT VkWeightAllocator : public VkAllocator
{
public:
explicit VkWeightAllocator(const VulkanDevice* vkdev, size_t preferred_block_size = 8 * 1024 * 1024); // 8M
virtual ~VkWeightAllocator();
public:
// release all blocks immediately
virtual void clear();
public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
private:
VkWeightAllocator(const VkWeightAllocator&);
VkWeightAllocator& operator=(const VkWeightAllocator&);
private:
VkWeightAllocatorPrivate* const d;
};
class VkStagingAllocatorPrivate;
class NCNN_EXPORT VkStagingAllocator : public VkAllocator
{
public:
explicit VkStagingAllocator(const VulkanDevice* vkdev);
virtual ~VkStagingAllocator();
public:
// ratio range 0 ~ 1
// default cr = 0.75
void set_size_compare_ratio(float scr);
// release all budgets immediately
virtual void clear();
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
private:
VkStagingAllocator(const VkStagingAllocator&);
VkStagingAllocator& operator=(const VkStagingAllocator&);
private:
VkStagingAllocatorPrivate* const d;
};
class VkWeightStagingAllocatorPrivate;
class NCNN_EXPORT VkWeightStagingAllocator : public VkAllocator
{
public:
explicit VkWeightStagingAllocator(const VulkanDevice* vkdev);
virtual ~VkWeightStagingAllocator();
public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
private:
VkWeightStagingAllocator(const VkWeightStagingAllocator&);
VkWeightStagingAllocator& operator=(const VkWeightStagingAllocator&);
private:
VkWeightStagingAllocatorPrivate* const d;
};
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 26
class NCNN_EXPORT VkAndroidHardwareBufferImageAllocator : public VkAllocator
{
public:
VkAndroidHardwareBufferImageAllocator(const VulkanDevice* _vkdev, AHardwareBuffer* _hb);
virtual ~VkAndroidHardwareBufferImageAllocator();
public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
private:
VkAndroidHardwareBufferImageAllocator(const VkAndroidHardwareBufferImageAllocator&);
VkAndroidHardwareBufferImageAllocator& operator=(const VkAndroidHardwareBufferImageAllocator&);
public:
int init();
int width() const;
int height() const;
uint64_t external_format() const;
public:
AHardwareBuffer* hb;
AHardwareBuffer_Desc bufferDesc;
VkAndroidHardwareBufferFormatPropertiesANDROID bufferFormatProperties;
VkAndroidHardwareBufferPropertiesANDROID bufferProperties;
VkSamplerYcbcrConversionKHR samplerYcbcrConversion;
};
#endif // __ANDROID_API__ >= 26
#endif // NCNN_PLATFORM_API
#endif // NCNN_VULKAN
} // namespace ncnn
#endif // NCNN_ALLOCATOR_H

36
lib_ncnn/ncnn/benchmark.h Executable file
View File

@@ -0,0 +1,36 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_BENCHMARK_H
#define NCNN_BENCHMARK_H
#include "layer.h"
#include "mat.h"
#include "platform.h"
namespace ncnn {
// get now timestamp in ms
NCNN_EXPORT double get_current_time();
#if NCNN_BENCHMARK
NCNN_EXPORT void benchmark(const Layer* layer, double start, double end);
NCNN_EXPORT void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_blob, double start, double end);
#endif // NCNN_BENCHMARK
} // namespace ncnn
#endif // NCNN_BENCHMARK_H

44
lib_ncnn/ncnn/blob.h Executable file
View File

@@ -0,0 +1,44 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_BLOB_H
#define NCNN_BLOB_H
#include "mat.h"
#include "platform.h"
namespace ncnn {
class NCNN_EXPORT Blob
{
public:
// empty
Blob();
public:
#if NCNN_STRING
// blob name
std::string name;
#endif // NCNN_STRING
// layer index which produce this blob as output
int producer;
// layer index which need this blob as input
int consumer;
// shape hint
Mat shape;
};
} // namespace ncnn
#endif // NCNN_BLOB_H

320
lib_ncnn/ncnn/c_api.h Executable file
View File

@@ -0,0 +1,320 @@
/* Tencent is pleased to support the open source community by making ncnn available.
*
* Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/BSD-3-Clause
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
#ifndef NCNN_C_API_H
#define NCNN_C_API_H
#include "platform.h"
#if NCNN_C_API
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
NCNN_EXPORT const char* ncnn_version();
/* allocator api */
typedef struct __ncnn_allocator_t* ncnn_allocator_t;
struct NCNN_EXPORT __ncnn_allocator_t
{
void* pthis;
void* (*fast_malloc)(ncnn_allocator_t allocator, size_t size);
void (*fast_free)(ncnn_allocator_t allocator, void* ptr);
};
NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_pool_allocator();
NCNN_EXPORT ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator();
NCNN_EXPORT void ncnn_allocator_destroy(ncnn_allocator_t allocator);
/* option api */
typedef struct __ncnn_option_t* ncnn_option_t;
NCNN_EXPORT ncnn_option_t ncnn_option_create();
NCNN_EXPORT void ncnn_option_destroy(ncnn_option_t opt);
NCNN_EXPORT int ncnn_option_get_num_threads(const ncnn_option_t opt);
NCNN_EXPORT void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads);
NCNN_EXPORT int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt);
NCNN_EXPORT void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int use_vulkan_compute);
/* mat api */
typedef struct __ncnn_mat_t* ncnn_mat_t;
NCNN_EXPORT ncnn_mat_t ncnn_mat_create();
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void* data, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, void* data, size_t elemsize, int elempack, ncnn_allocator_t allocator);
NCNN_EXPORT void ncnn_mat_destroy(ncnn_mat_t mat);
NCNN_EXPORT void ncnn_mat_fill_float(ncnn_mat_t mat, float v);
NCNN_EXPORT ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d, int c, ncnn_allocator_t allocator);
NCNN_EXPORT int ncnn_mat_get_dims(const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_mat_get_w(const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_mat_get_h(const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_mat_get_d(const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_mat_get_c(const ncnn_mat_t mat);
NCNN_EXPORT size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_mat_get_elempack(const ncnn_mat_t mat);
NCNN_EXPORT size_t ncnn_mat_get_cstep(const ncnn_mat_t mat);
NCNN_EXPORT void* ncnn_mat_get_data(const ncnn_mat_t mat);
NCNN_EXPORT void* ncnn_mat_get_channel_data(const ncnn_mat_t mat, int c);
#if NCNN_PIXEL
/* mat pixel api */
#define NCNN_MAT_PIXEL_RGB 1
#define NCNN_MAT_PIXEL_BGR 2
#define NCNN_MAT_PIXEL_GRAY 3
#define NCNN_MAT_PIXEL_RGBA 4
#define NCNN_MAT_PIXEL_BGRA 5
#define NCNN_MAT_PIXEL_X2Y(X, Y) (X | (Y << 16))
NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, int w, int h, int stride, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int type, int w, int h, int stride, int target_width, int target_height, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, ncnn_allocator_t allocator);
NCNN_EXPORT ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, int type, int w, int h, int stride, int roix, int roiy, int roiw, int roih, int target_width, int target_height, ncnn_allocator_t allocator);
NCNN_EXPORT void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int type, int stride);
NCNN_EXPORT void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixels, int type, int target_width, int target_height, int target_stride);
#endif /* NCNN_PIXEL */
NCNN_EXPORT void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean_vals, const float* norm_vals);
NCNN_EXPORT void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int elempack, const ncnn_option_t opt);
NCNN_EXPORT void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_option_t opt);
/* blob api */
typedef struct __ncnn_blob_t* ncnn_blob_t;
#if NCNN_STRING
NCNN_EXPORT const char* ncnn_blob_get_name(const ncnn_blob_t blob);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_blob_get_producer(const ncnn_blob_t blob);
NCNN_EXPORT int ncnn_blob_get_consumer(const ncnn_blob_t blob);
NCNN_EXPORT void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int* h, int* c);
/* paramdict api */
typedef struct __ncnn_paramdict_t* ncnn_paramdict_t;
NCNN_EXPORT ncnn_paramdict_t ncnn_paramdict_create();
NCNN_EXPORT void ncnn_paramdict_destroy(ncnn_paramdict_t pd);
NCNN_EXPORT int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id);
NCNN_EXPORT int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def);
NCNN_EXPORT float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float def);
NCNN_EXPORT ncnn_mat_t ncnn_paramdict_get_array(const ncnn_paramdict_t pd, int id, const ncnn_mat_t def);
NCNN_EXPORT void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i);
NCNN_EXPORT void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f);
NCNN_EXPORT void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, const ncnn_mat_t v);
/* datareader api */
typedef struct __ncnn_datareader_t* ncnn_datareader_t;
struct NCNN_EXPORT __ncnn_datareader_t
{
void* pthis;
#if NCNN_STRING
int (*scan)(ncnn_datareader_t dr, const char* format, void* p);
#endif /* NCNN_STRING */
size_t (*read)(ncnn_datareader_t dr, void* buf, size_t size);
};
NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create();
#if NCNN_STDIO
NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp);
#endif /* NCNN_STDIO */
NCNN_EXPORT ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char** mem);
NCNN_EXPORT void ncnn_datareader_destroy(ncnn_datareader_t dr);
/* modelbin api */
typedef struct __ncnn_modelbin_t* ncnn_modelbin_t;
struct NCNN_EXPORT __ncnn_modelbin_t
{
void* pthis;
ncnn_mat_t (*load_1d)(const ncnn_modelbin_t mb, int w, int type);
ncnn_mat_t (*load_2d)(const ncnn_modelbin_t mb, int w, int h, int type);
ncnn_mat_t (*load_3d)(const ncnn_modelbin_t mb, int w, int h, int c, int type);
};
NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_datareader_t dr);
NCNN_EXPORT ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* weights, int n);
NCNN_EXPORT void ncnn_modelbin_destroy(ncnn_modelbin_t mb);
/* layer api */
typedef struct __ncnn_layer_t* ncnn_layer_t;
struct NCNN_EXPORT __ncnn_layer_t
{
void* pthis;
int (*load_param)(ncnn_layer_t layer, const ncnn_paramdict_t pd);
int (*load_model)(ncnn_layer_t layer, const ncnn_modelbin_t mb);
int (*create_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt);
int (*destroy_pipeline)(ncnn_layer_t layer, const ncnn_option_t opt);
int (*forward_1)(const ncnn_layer_t layer, const ncnn_mat_t bottom_blob, ncnn_mat_t* top_blob, const ncnn_option_t opt);
int (*forward_n)(const ncnn_layer_t layer, const ncnn_mat_t* bottom_blobs, int n, ncnn_mat_t* top_blobs, int n2, const ncnn_option_t opt);
int (*forward_inplace_1)(const ncnn_layer_t layer, ncnn_mat_t bottom_top_blob, const ncnn_option_t opt);
int (*forward_inplace_n)(const ncnn_layer_t layer, ncnn_mat_t* bottom_top_blobs, int n, const ncnn_option_t opt);
};
NCNN_EXPORT ncnn_layer_t ncnn_layer_create();
NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex);
#if NCNN_STRING
NCNN_EXPORT ncnn_layer_t ncnn_layer_create_by_type(const char* type);
#endif /* NCNN_STRING */
NCNN_EXPORT void ncnn_layer_destroy(ncnn_layer_t layer);
#if NCNN_STRING
NCNN_EXPORT const char* ncnn_layer_get_name(const ncnn_layer_t layer);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_layer_get_typeindex(const ncnn_layer_t layer);
#if NCNN_STRING
NCNN_EXPORT const char* ncnn_layer_get_type(const ncnn_layer_t layer);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_inplace(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_packing(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_support_image_storage(const ncnn_layer_t layer);
NCNN_EXPORT void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable);
NCNN_EXPORT void ncnn_layer_set_support_image_storage(ncnn_layer_t layer, int enable);
NCNN_EXPORT int ncnn_layer_get_bottom_count(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i);
NCNN_EXPORT int ncnn_layer_get_top_count(const ncnn_layer_t layer);
NCNN_EXPORT int ncnn_layer_get_top(const ncnn_layer_t layer, int i);
NCNN_EXPORT void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c);
NCNN_EXPORT void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims, int* w, int* h, int* c);
/* layer factory function */
typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata);
typedef void (*ncnn_layer_destroyer_t)(ncnn_layer_t layer, void* userdata);
typedef struct __ncnn_net_custom_layer_factory_t* ncnn_net_custom_layer_factory_t;
struct __ncnn_net_custom_layer_factory_t
{
ncnn_layer_creator_t creator;
ncnn_layer_destroyer_t destroyer;
void* userdata;
ncnn_net_custom_layer_factory_t next;
};
/* net api */
typedef struct __ncnn_net_t* ncnn_net_t;
struct __ncnn_net_t
{
void* pthis;
ncnn_net_custom_layer_factory_t custom_layer_factory;
};
NCNN_EXPORT ncnn_net_t ncnn_net_create();
NCNN_EXPORT void ncnn_net_destroy(ncnn_net_t net);
NCNN_EXPORT void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt);
#if NCNN_STRING
NCNN_EXPORT void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* type, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata);
#endif /* NCNN_STRING */
NCNN_EXPORT void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typeindex, ncnn_layer_creator_t creator, ncnn_layer_destroyer_t destroyer, void* userdata);
#if NCNN_STDIO
#if NCNN_STRING
NCNN_EXPORT int ncnn_net_load_param(ncnn_net_t net, const char* path);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_net_load_param_bin(ncnn_net_t net, const char* path);
NCNN_EXPORT int ncnn_net_load_model(ncnn_net_t net, const char* path);
#endif /* NCNN_STDIO */
#if NCNN_STDIO
#if NCNN_STRING
NCNN_EXPORT int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem);
#endif /* NCNN_STRING */
#endif /* NCNN_STDIO */
NCNN_EXPORT int ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned char* mem);
NCNN_EXPORT int ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem);
#if NCNN_STRING
NCNN_EXPORT int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader_t dr);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datareader_t dr);
NCNN_EXPORT int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader_t dr);
NCNN_EXPORT void ncnn_net_clear(ncnn_net_t net);
/* extractor api */
typedef struct __ncnn_extractor_t* ncnn_extractor_t;
NCNN_EXPORT ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net);
NCNN_EXPORT void ncnn_extractor_destroy(ncnn_extractor_t ex);
NCNN_EXPORT void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t opt);
#if NCNN_STRING
NCNN_EXPORT int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_mat_t* mat);
#endif /* NCNN_STRING */
NCNN_EXPORT int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncnn_mat_t mat);
NCNN_EXPORT int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_mat_t* mat);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* NCNN_C_API */
#endif /* NCNN_C_API_H */

136
lib_ncnn/ncnn/command.h Executable file
View File

@@ -0,0 +1,136 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_COMMAND_H
#define NCNN_COMMAND_H
#include "platform.h"
#if NCNN_VULKAN
#include "mat.h"
#include <vulkan/vulkan.h>
namespace ncnn {
class Pipeline;
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 26
class ImportAndroidHardwareBufferPipeline;
#endif // __ANDROID_API__ >= 26
#endif // NCNN_PLATFORM_API
class VkComputePrivate;
class NCNN_EXPORT VkCompute
{
public:
explicit VkCompute(const VulkanDevice* vkdev);
virtual ~VkCompute();
public:
void record_upload(const Mat& src, VkMat& dst, const Option& opt);
void record_upload(const Mat& src, VkImageMat& dst, const Option& opt);
void record_download(const VkMat& src, Mat& dst, const Option& opt);
void record_download(const VkImageMat& src, Mat& dst, const Option& opt);
void record_buffer_to_image(const VkMat& src, VkImageMat& dst, const Option& opt);
void record_image_to_buffer(const VkImageMat& src, VkMat& dst, const Option& opt);
void record_clone(const Mat& src, VkMat& dst, const Option& opt);
void record_clone(const Mat& src, VkImageMat& dst, const Option& opt);
void record_clone(const VkMat& src, Mat& dst, const Option& opt);
void record_clone(const VkImageMat& src, Mat& dst, const Option& opt);
void record_clone(const VkMat& src, VkMat& dst, const Option& opt);
void record_clone(const VkImageMat& src, VkImageMat& dst, const Option& opt);
void record_clone(const VkMat& src, VkImageMat& dst, const Option& opt);
void record_clone(const VkImageMat& src, VkMat& dst, const Option& opt);
void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& bindings, const std::vector<vk_constant_type>& constants, const VkMat& dispatcher);
void record_pipeline(const Pipeline* pipeline, const std::vector<VkImageMat>& bindings, const std::vector<vk_constant_type>& constants, const VkImageMat& dispatcher);
void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const VkMat& dispatcher);
void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const VkImageMat& dispatcher);
void record_pipeline(const Pipeline* pipeline, const std::vector<VkMat>& buffer_bindings, const std::vector<VkImageMat>& image_bindings, const std::vector<vk_constant_type>& constants, const Mat& dispatcher);
#if NCNN_BENCHMARK
void record_write_timestamp(uint32_t query);
#endif // NCNN_BENCHMARK
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 26
void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkMat& dst);
void record_import_android_hardware_buffer(const ImportAndroidHardwareBufferPipeline* pipeline, const VkImageMat& src, const VkImageMat& dst);
#endif // __ANDROID_API__ >= 26
#endif // NCNN_PLATFORM_API
int submit_and_wait();
int reset();
#if NCNN_BENCHMARK
int create_query_pool(uint32_t query_count);
int get_query_pool_results(uint32_t first_query, uint32_t query_count, std::vector<uint64_t>& results);
#endif // NCNN_BENCHMARK
protected:
const VulkanDevice* vkdev;
void barrier_readwrite(const VkMat& binding);
void barrier_readwrite(const VkImageMat& binding);
void barrier_readonly(const VkImageMat& binding);
private:
VkComputePrivate* const d;
};
class VkTransferPrivate;
class NCNN_EXPORT VkTransfer
{
public:
explicit VkTransfer(const VulkanDevice* vkdev);
virtual ~VkTransfer();
public:
void record_upload(const Mat& src, VkMat& dst, const Option& opt, bool flatten = true);
void record_upload(const Mat& src, VkImageMat& dst, const Option& opt);
int submit_and_wait();
protected:
const VulkanDevice* vkdev;
private:
VkTransferPrivate* const d;
};
} // namespace ncnn
#endif // NCNN_VULKAN
#endif // NCNN_COMMAND_H

131
lib_ncnn/ncnn/cpu.h Executable file
View File

@@ -0,0 +1,131 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_CPU_H
#define NCNN_CPU_H
#include <stddef.h>
#if defined __ANDROID__ || defined __linux__
#include <sched.h> // cpu_set_t
#endif
#include "platform.h"
namespace ncnn {
class NCNN_EXPORT CpuSet
{
public:
CpuSet();
void enable(int cpu);
void disable(int cpu);
void disable_all();
bool is_enabled(int cpu) const;
int num_enabled() const;
public:
#if defined __ANDROID__ || defined __linux__
cpu_set_t cpu_set;
#endif
#if __APPLE__
unsigned int policy;
#endif
};
// test optional cpu features
// neon = armv7 neon or aarch64 asimd
NCNN_EXPORT int cpu_support_arm_neon();
// vfpv4 = armv7 fp16 + fma
NCNN_EXPORT int cpu_support_arm_vfpv4();
// asimdhp = aarch64 asimd half precision
NCNN_EXPORT int cpu_support_arm_asimdhp();
// asimddp = aarch64 asimd dot product
NCNN_EXPORT int cpu_support_arm_asimddp();
// avx = x86 avx
NCNN_EXPORT int cpu_support_x86_avx();
// fma = x86 fma
NCNN_EXPORT int cpu_support_x86_fma();
// xop = x86 xop
NCNN_EXPORT int cpu_support_x86_xop();
// f16c = x86 f16c
NCNN_EXPORT int cpu_support_x86_f16c();
// avx2 = x86 avx2 + fma + f16c
NCNN_EXPORT int cpu_support_x86_avx2();
// avx_vnni = x86 avx vnni
NCNN_EXPORT int cpu_support_x86_avx_vnni();
// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl
NCNN_EXPORT int cpu_support_x86_avx512();
// avx512_vnni = x86 avx512 vnni
NCNN_EXPORT int cpu_support_x86_avx512_vnni();
// msa = mips mas
NCNN_EXPORT int cpu_support_mips_msa();
// mmi = loongson mmi
NCNN_EXPORT int cpu_support_loongson_mmi();
// v = riscv vector
NCNN_EXPORT int cpu_support_riscv_v();
// zfh = riscv half-precision float
NCNN_EXPORT int cpu_support_riscv_zfh();
// vlenb = riscv vector length in bytes
NCNN_EXPORT int cpu_riscv_vlenb();
// cpu info
NCNN_EXPORT int get_cpu_count();
NCNN_EXPORT int get_little_cpu_count();
NCNN_EXPORT int get_big_cpu_count();
// bind all threads on little clusters if powersave enabled
// affects HMP arch cpu like ARM big.LITTLE
// only implemented on android at the moment
// switching powersave is expensive and not thread-safe
// 0 = all cores enabled(default)
// 1 = only little clusters enabled
// 2 = only big clusters enabled
// return 0 if success for setter function
NCNN_EXPORT int get_cpu_powersave();
NCNN_EXPORT int set_cpu_powersave(int powersave);
// convenient wrapper
NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave);
// set explicit thread affinity
NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);
// misc function wrapper for openmp routines
NCNN_EXPORT int get_omp_num_threads();
NCNN_EXPORT void set_omp_num_threads(int num_threads);
NCNN_EXPORT int get_omp_dynamic();
NCNN_EXPORT void set_omp_dynamic(int dynamic);
NCNN_EXPORT int get_omp_thread_num();
NCNN_EXPORT int get_kmp_blocktime();
NCNN_EXPORT void set_kmp_blocktime(int time_ms);
// need to flush denormals on Intel Chipset.
// Other architectures such as ARM can be added as needed.
// 0 = DAZ OFF, FTZ OFF
// 1 = DAZ ON , FTZ OFF
// 2 = DAZ OFF, FTZ ON
// 3 = DAZ ON, FTZ ON
NCNN_EXPORT int get_flush_denormals();
NCNN_EXPORT int set_flush_denormals(int flush_denormals);
} // namespace ncnn
#endif // NCNN_CPU_H

122
lib_ncnn/ncnn/datareader.h Executable file
View File

@@ -0,0 +1,122 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_DATAREADER_H
#define NCNN_DATAREADER_H
#include "platform.h"
#if NCNN_STDIO
#include <stdio.h>
#endif
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 9
#include <android/asset_manager.h>
#endif
#endif // NCNN_PLATFORM_API
namespace ncnn {
// data read wrapper
class NCNN_EXPORT DataReader
{
public:
DataReader();
virtual ~DataReader();
#if NCNN_STRING
// parse plain param text
// return 1 if scan success
virtual int scan(const char* format, void* p) const;
#endif // NCNN_STRING
// read binary param and model data
// return bytes read
virtual size_t read(void* buf, size_t size) const;
// get model data reference
// return bytes referenced
virtual size_t reference(size_t size, const void** buf) const;
};
#if NCNN_STDIO
class DataReaderFromStdioPrivate;
class NCNN_EXPORT DataReaderFromStdio : public DataReader
{
public:
explicit DataReaderFromStdio(FILE* fp);
virtual ~DataReaderFromStdio();
#if NCNN_STRING
virtual int scan(const char* format, void* p) const;
#endif // NCNN_STRING
virtual size_t read(void* buf, size_t size) const;
private:
DataReaderFromStdio(const DataReaderFromStdio&);
DataReaderFromStdio& operator=(const DataReaderFromStdio&);
private:
DataReaderFromStdioPrivate* const d;
};
#endif // NCNN_STDIO
class DataReaderFromMemoryPrivate;
class NCNN_EXPORT DataReaderFromMemory : public DataReader
{
public:
explicit DataReaderFromMemory(const unsigned char*& mem);
virtual ~DataReaderFromMemory();
#if NCNN_STRING
virtual int scan(const char* format, void* p) const;
#endif // NCNN_STRING
virtual size_t read(void* buf, size_t size) const;
virtual size_t reference(size_t size, const void** buf) const;
private:
DataReaderFromMemory(const DataReaderFromMemory&);
DataReaderFromMemory& operator=(const DataReaderFromMemory&);
private:
DataReaderFromMemoryPrivate* const d;
};
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 9
class DataReaderFromAndroidAssetPrivate;
class NCNN_EXPORT DataReaderFromAndroidAsset : public DataReader
{
public:
explicit DataReaderFromAndroidAsset(AAsset* asset);
virtual ~DataReaderFromAndroidAsset();
#if NCNN_STRING
virtual int scan(const char* format, void* p) const;
#endif // NCNN_STRING
virtual size_t read(void* buf, size_t size) const;
private:
DataReaderFromAndroidAsset(const DataReaderFromAndroidAsset&);
DataReaderFromAndroidAsset& operator=(const DataReaderFromAndroidAsset&);
private:
DataReaderFromAndroidAssetPrivate* const d;
};
#endif // __ANDROID_API__ >= 9
#endif // NCNN_PLATFORM_API
} // namespace ncnn
#endif // NCNN_DATAREADER_H

359
lib_ncnn/ncnn/gpu.h Executable file
View File

@@ -0,0 +1,359 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_GPU_H
#define NCNN_GPU_H
#include "platform.h"
#if NCNN_VULKAN
#include "mat.h"
#include <vulkan/vulkan.h>
#include "vulkan_header_fix.h"
namespace ncnn {
// instance
NCNN_EXPORT int create_gpu_instance();
NCNN_EXPORT void destroy_gpu_instance();
// instance extension capability
extern int support_VK_KHR_external_memory_capabilities;
extern int support_VK_KHR_get_physical_device_properties2;
extern int support_VK_KHR_get_surface_capabilities2;
extern int support_VK_KHR_surface;
extern int support_VK_EXT_debug_utils;
#if __ANDROID_API__ >= 26
extern int support_VK_KHR_android_surface;
#endif // __ANDROID_API__ >= 26
// VK_KHR_external_memory_capabilities
extern PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR vkGetPhysicalDeviceExternalBufferPropertiesKHR;
// VK_KHR_get_physical_device_properties2
extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
extern PFN_vkGetPhysicalDeviceFormatProperties2KHR vkGetPhysicalDeviceFormatProperties2KHR;
extern PFN_vkGetPhysicalDeviceImageFormatProperties2KHR vkGetPhysicalDeviceImageFormatProperties2KHR;
extern PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR vkGetPhysicalDeviceQueueFamilyProperties2KHR;
extern PFN_vkGetPhysicalDeviceMemoryProperties2KHR vkGetPhysicalDeviceMemoryProperties2KHR;
extern PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR vkGetPhysicalDeviceSparseImageFormatProperties2KHR;
// VK_KHR_get_surface_capabilities2
extern PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR vkGetPhysicalDeviceSurfaceCapabilities2KHR;
extern PFN_vkGetPhysicalDeviceSurfaceFormats2KHR vkGetPhysicalDeviceSurfaceFormats2KHR;
// VK_KHR_surface
extern PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR;
extern PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR;
extern PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR;
extern PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR;
extern PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR;
#if __ANDROID_API__ >= 26
// VK_KHR_android_surface
extern PFN_vkCreateAndroidSurfaceKHR vkCreateAndroidSurfaceKHR;
#endif // __ANDROID_API__ >= 26
// VK_NV_cooperative_matrix
extern PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV vkGetPhysicalDeviceCooperativeMatrixPropertiesNV;
// get info
NCNN_EXPORT int get_gpu_count();
NCNN_EXPORT int get_default_gpu_index();
class GpuInfoPrivate;
class NCNN_EXPORT GpuInfo
{
public:
explicit GpuInfo();
virtual ~GpuInfo();
// vulkan physical device
VkPhysicalDevice physical_device() const;
// memory properties
const VkPhysicalDeviceMemoryProperties& physical_device_memory_properties() const;
// info
uint32_t api_version() const;
uint32_t driver_version() const;
uint32_t vendor_id() const;
uint32_t device_id() const;
const char* device_name() const;
uint8_t* pipeline_cache_uuid() const;
// 0 = discrete gpu
// 1 = integrated gpu
// 2 = virtual gpu
// 3 = cpu
int type() const;
// hardware limit
uint32_t max_shared_memory_size() const;
uint32_t max_workgroup_count_x() const;
uint32_t max_workgroup_count_y() const;
uint32_t max_workgroup_count_z() const;
uint32_t max_workgroup_invocations() const;
uint32_t max_workgroup_size_x() const;
uint32_t max_workgroup_size_y() const;
uint32_t max_workgroup_size_z() const;
size_t memory_map_alignment() const;
size_t buffer_offset_alignment() const;
size_t non_coherent_atom_size() const;
size_t buffer_image_granularity() const;
uint32_t max_image_dimension_1d() const;
uint32_t max_image_dimension_2d() const;
uint32_t max_image_dimension_3d() const;
float timestamp_period() const;
// runtime
uint32_t compute_queue_family_index() const;
uint32_t graphics_queue_family_index() const;
uint32_t transfer_queue_family_index() const;
uint32_t compute_queue_count() const;
uint32_t graphics_queue_count() const;
uint32_t transfer_queue_count() const;
// property
bool unified_compute_transfer_queue() const;
// subgroup
uint32_t subgroup_size() const;
bool support_subgroup_basic() const;
bool support_subgroup_vote() const;
bool support_subgroup_ballot() const;
bool support_subgroup_shuffle() const;
// bug is not feature
bool bug_storage_buffer_no_l1() const;
bool bug_corrupted_online_pipeline_cache() const;
bool bug_buffer_image_load_zero() const;
// but sometimes bug is a feature
bool bug_implicit_fp16_arithmetic() const;
// fp16 and int8 feature
bool support_fp16_packed() const;
bool support_fp16_storage() const;
bool support_fp16_arithmetic() const;
bool support_int8_packed() const;
bool support_int8_storage() const;
bool support_int8_arithmetic() const;
// ycbcr conversion feature
bool support_ycbcr_conversion() const;
// cooperative matrix feature
bool support_cooperative_matrix() const;
bool support_cooperative_matrix_16_8_8() const;
// extension capability
int support_VK_KHR_8bit_storage() const;
int support_VK_KHR_16bit_storage() const;
int support_VK_KHR_bind_memory2() const;
int support_VK_KHR_create_renderpass2() const;
int support_VK_KHR_dedicated_allocation() const;
int support_VK_KHR_descriptor_update_template() const;
int support_VK_KHR_external_memory() const;
int support_VK_KHR_get_memory_requirements2() const;
int support_VK_KHR_maintenance1() const;
int support_VK_KHR_maintenance2() const;
int support_VK_KHR_maintenance3() const;
int support_VK_KHR_multiview() const;
int support_VK_KHR_push_descriptor() const;
int support_VK_KHR_sampler_ycbcr_conversion() const;
int support_VK_KHR_shader_float16_int8() const;
int support_VK_KHR_shader_float_controls() const;
int support_VK_KHR_storage_buffer_storage_class() const;
int support_VK_KHR_swapchain() const;
int support_VK_EXT_descriptor_indexing() const;
int support_VK_EXT_memory_budget() const;
int support_VK_EXT_queue_family_foreign() const;
#if __ANDROID_API__ >= 26
int support_VK_ANDROID_external_memory_android_hardware_buffer() const;
#endif // __ANDROID_API__ >= 26
int support_VK_NV_cooperative_matrix() const;
private:
GpuInfo(const GpuInfo&);
GpuInfo& operator=(const GpuInfo&);
private:
friend int create_gpu_instance();
GpuInfoPrivate* const d;
};
NCNN_EXPORT const GpuInfo& get_gpu_info(int device_index = get_default_gpu_index());
class VkAllocator;
class VkCompute;
class Option;
class PipelineCache;
class VulkanDevicePrivate;
class NCNN_EXPORT VulkanDevice
{
public:
VulkanDevice(int device_index = get_default_gpu_index());
~VulkanDevice();
const GpuInfo& info;
VkDevice vkdevice() const;
VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size) const;
// with fixed workgroup size
VkShaderModule compile_shader_module(const uint32_t* spv_data, size_t spv_data_size, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) const;
// helper for creating pipeline
int create_descriptorset_layout(int binding_count, const int* binding_types, VkDescriptorSetLayout* descriptorset_layout) const;
int create_pipeline_layout(int push_constant_count, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout* pipeline_layout) const;
int create_pipeline(VkShaderModule shader_module, VkPipelineLayout pipeline_layout, const std::vector<vk_specialization_type>& specializations, VkPipeline* pipeline) const;
int create_descriptor_update_template(int binding_count, const int* binding_types, VkDescriptorSetLayout descriptorset_layout, VkPipelineLayout pipeline_layout, VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
uint32_t find_memory_index(uint32_t memory_type_bits, VkFlags required, VkFlags preferred, VkFlags preferred_not) const;
bool is_mappable(uint32_t memory_type_index) const;
bool is_coherent(uint32_t memory_type_index) const;
VkQueue acquire_queue(uint32_t queue_family_index) const;
void reclaim_queue(uint32_t queue_family_index, VkQueue queue) const;
// allocator on this device
VkAllocator* acquire_blob_allocator() const;
void reclaim_blob_allocator(VkAllocator* allocator) const;
VkAllocator* acquire_staging_allocator() const;
void reclaim_staging_allocator(VkAllocator* allocator) const;
// immutable sampler for texelfetch
const VkSampler* immutable_texelfetch_sampler() const;
// dummy buffer image
VkMat get_dummy_buffer() const;
VkImageMat get_dummy_image() const;
VkImageMat get_dummy_image_readonly() const;
// pipeline cache on this device
const PipelineCache* get_pipeline_cache() const;
// test image allocation
bool shape_support_image_storage(const Mat& shape) const;
// current gpu heap memory budget in MB
uint32_t get_heap_budget() const;
// utility operator
void convert_packing(const VkMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
void convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
void convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
void convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const;
// VK_KHR_bind_memory2
PFN_vkBindBufferMemory2KHR vkBindBufferMemory2KHR;
PFN_vkBindImageMemory2KHR vkBindImageMemory2KHR;
// VK_KHR_create_renderpass2
PFN_vkCmdBeginRenderPass2KHR vkCmdBeginRenderPass2KHR;
PFN_vkCmdEndRenderPass2KHR vkCmdEndRenderPass2KHR;
PFN_vkCmdNextSubpass2KHR vkCmdNextSubpass2KHR;
PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
// VK_KHR_descriptor_update_template
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
// VK_KHR_get_memory_requirements2
PFN_vkGetImageMemoryRequirements2KHR vkGetImageMemoryRequirements2KHR;
PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR;
PFN_vkGetImageSparseMemoryRequirements2KHR vkGetImageSparseMemoryRequirements2KHR;
// VK_KHR_maintenance1
PFN_vkTrimCommandPoolKHR vkTrimCommandPoolKHR;
// VK_KHR_maintenance3
PFN_vkGetDescriptorSetLayoutSupportKHR vkGetDescriptorSetLayoutSupportKHR;
// VK_KHR_push_descriptor
PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR;
PFN_vkCmdPushDescriptorSetKHR vkCmdPushDescriptorSetKHR;
// VK_KHR_sampler_ycbcr_conversion
PFN_vkCreateSamplerYcbcrConversionKHR vkCreateSamplerYcbcrConversionKHR;
PFN_vkDestroySamplerYcbcrConversionKHR vkDestroySamplerYcbcrConversionKHR;
// VK_KHR_swapchain
PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR;
PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR;
PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR;
PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR;
PFN_vkQueuePresentKHR vkQueuePresentKHR;
#if __ANDROID_API__ >= 26
// VK_ANDROID_external_memory_android_hardware_buffer
PFN_vkGetAndroidHardwareBufferPropertiesANDROID vkGetAndroidHardwareBufferPropertiesANDROID;
PFN_vkGetMemoryAndroidHardwareBufferANDROID vkGetMemoryAndroidHardwareBufferANDROID;
#endif // __ANDROID_API__ >= 26
protected:
// device extension
int init_device_extension();
private:
VulkanDevice(const VulkanDevice&);
VulkanDevice& operator=(const VulkanDevice&);
private:
VulkanDevicePrivate* const d;
};
NCNN_EXPORT VulkanDevice* get_gpu_device(int device_index = get_default_gpu_index());
// online spirv compilation
NCNN_EXPORT int compile_spirv_module(const char* comp_string, const Option& opt, std::vector<uint32_t>& spirv);
NCNN_EXPORT int compile_spirv_module(const char* comp_data, int comp_data_size, const Option& opt, std::vector<uint32_t>& spirv);
NCNN_EXPORT int compile_spirv_module(int shader_type_index, const Option& opt, std::vector<uint32_t>& spirv);
// info from spirv
class NCNN_EXPORT ShaderInfo
{
public:
int specialization_count;
int binding_count;
int push_constant_count;
// 0 = null
// 1 = storage buffer
// 2 = storage image
// 3 = combined image sampler
int binding_types[16]; // 16 is large enough I think ...
int reserved_0;
int reserved_1;
int reserved_2;
int reserved_3;
};
NCNN_EXPORT int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size, ShaderInfo& shader_info);
} // namespace ncnn
#endif // NCNN_VULKAN
#endif // NCNN_GPU_H

215
lib_ncnn/ncnn/layer.h Executable file
View File

@@ -0,0 +1,215 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_LAYER_H
#define NCNN_LAYER_H
#include "mat.h"
#include "modelbin.h"
#include "option.h"
#include "paramdict.h"
#include "platform.h"
#include <math.h>
#if NCNN_VULKAN
#include "command.h"
#include "pipeline.h"
#include <vulkan/vulkan.h>
#endif // NCNN_VULKAN
namespace ncnn {
class NCNN_EXPORT Layer
{
public:
// empty
Layer();
// virtual destructor
virtual ~Layer();
// load layer specific parameter from parsed dict
// return 0 if success
virtual int load_param(const ParamDict& pd);
// load layer specific weight data from model binary
// return 0 if success
virtual int load_model(const ModelBin& mb);
// layer implementation specific setup
// return 0 if success
virtual int create_pipeline(const Option& opt);
// layer implementation specific clean
// return 0 if success
virtual int destroy_pipeline(const Option& opt);
public:
// one input and one output blob
bool one_blob_only;
// support inplace inference
bool support_inplace;
// support vulkan compute
bool support_vulkan;
// accept input blob with packed storage
bool support_packing;
// accept bf16
bool support_bf16_storage;
// accept fp16
bool support_fp16_storage;
// accept int8
bool support_int8_storage;
// shader image storage
bool support_image_storage;
// shader tensor storage
bool support_tensor_storage;
bool support_reserved_00;
bool support_reserved_0;
bool support_reserved_1;
bool support_reserved_2;
bool support_reserved_3;
bool support_reserved_4;
bool support_reserved_5;
bool support_reserved_6;
bool support_reserved_7;
bool support_reserved_8;
bool support_reserved_9;
bool support_reserved_10;
bool support_reserved_11;
bool support_reserved_12;
bool support_reserved_13;
public:
// implement inference
// return 0 if success
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
// implement inplace inference
// return 0 if success
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
#if NCNN_VULKAN
public:
// upload weight blob from host to device
virtual int upload_model(VkTransfer& cmd, const Option& opt);
public:
// implement inference
// return 0 if success
virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const;
// implement inference
// return 0 if success
virtual int forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const;
virtual int forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const;
// implement inplace inference
// return 0 if success
virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt) const;
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
// implement inplace inference
// return 0 if success
virtual int forward_inplace(std::vector<VkImageMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt) const;
virtual int forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
public:
// assigned immediately after creating this layer
const VulkanDevice* vkdev;
#endif // NCNN_VULKAN
public:
// custom user data
void* userdata;
// layer type index
int typeindex;
#if NCNN_STRING
// layer type name
std::string type;
// layer name
std::string name;
#endif // NCNN_STRING
// blob index which this layer needs as input
std::vector<int> bottoms;
// blob index which this layer produces as output
std::vector<int> tops;
// shape hint
std::vector<Mat> bottom_shapes;
std::vector<Mat> top_shapes;
};
// layer factory function
typedef Layer* (*layer_creator_func)(void*);
typedef void (*layer_destroyer_func)(Layer*, void*);
struct layer_registry_entry
{
#if NCNN_STRING
// layer type name
const char* name;
#endif // NCNN_STRING
// layer factory entry
layer_creator_func creator;
};
struct custom_layer_registry_entry
{
#if NCNN_STRING
// layer type name
const char* name;
#endif // NCNN_STRING
// layer factory entry
layer_creator_func creator;
layer_destroyer_func destroyer;
void* userdata;
};
#if NCNN_STRING
// get layer type from type name
NCNN_EXPORT int layer_to_index(const char* type);
// create layer from type name
NCNN_EXPORT Layer* create_layer(const char* type);
#endif // NCNN_STRING
// create layer from layer type
NCNN_EXPORT Layer* create_layer(int index);
#define DEFINE_LAYER_CREATOR(name) \
::ncnn::Layer* name##_layer_creator(void* /*userdata*/) \
{ \
return new name; \
}
#define DEFINE_LAYER_DESTROYER(name) \
void name##_layer_destroyer(::ncnn::Layer* layer, void* /*userdata*/) \
{ \
delete layer; \
}
} // namespace ncnn
#endif // NCNN_LAYER_H

View File

@@ -0,0 +1,29 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_LAYER_SHADER_TYPE_H
#define NCNN_LAYER_SHADER_TYPE_H
namespace ncnn {
namespace LayerShaderType {
enum LayerShaderType
{
#include "layer_shader_type_enum.h"
};
} // namespace LayerShaderType
} // namespace ncnn
#endif // NCNN_LAYER_SHADER_TYPE_H

View File

@@ -0,0 +1,5 @@
// Layer Shader Enum header
//
// This file is auto-generated by cmake, don't edit it.

30
lib_ncnn/ncnn/layer_type.h Executable file
View File

@@ -0,0 +1,30 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_LAYER_TYPE_H
#define NCNN_LAYER_TYPE_H
namespace ncnn {
namespace LayerType {
enum LayerType
{
#include "layer_type_enum.h"
CustomBit = (1 << 8),
};
} // namespace LayerType
} // namespace ncnn
#endif // NCNN_LAYER_TYPE_H

98
lib_ncnn/ncnn/layer_type_enum.h Executable file
View File

@@ -0,0 +1,98 @@
// Layer Type Enum header
//
// This file is auto-generated by cmake, don't edit it.
AbsVal = 0,
ArgMax = 1,
BatchNorm = 2,
Bias = 3,
BNLL = 4,
Concat = 5,
Convolution = 6,
Crop = 7,
Deconvolution = 8,
Dropout = 9,
Eltwise = 10,
ELU = 11,
Embed = 12,
Exp = 13,
Flatten = 14,
InnerProduct = 15,
Input = 16,
Log = 17,
LRN = 18,
MemoryData = 19,
MVN = 20,
Pooling = 21,
Power = 22,
PReLU = 23,
Proposal = 24,
Reduction = 25,
ReLU = 26,
Reshape = 27,
ROIPooling = 28,
Scale = 29,
Sigmoid = 30,
Slice = 31,
Softmax = 32,
Split = 33,
SPP = 34,
TanH = 35,
Threshold = 36,
Tile = 37,
RNN = 38,
LSTM = 39,
BinaryOp = 40,
UnaryOp = 41,
ConvolutionDepthWise = 42,
Padding = 43,
Squeeze = 44,
ExpandDims = 45,
Normalize = 46,
Permute = 47,
PriorBox = 48,
DetectionOutput = 49,
Interp = 50,
DeconvolutionDepthWise = 51,
ShuffleChannel = 52,
InstanceNorm = 53,
Clip = 54,
Reorg = 55,
YoloDetectionOutput = 56,
Quantize = 57,
Dequantize = 58,
Yolov3DetectionOutput = 59,
PSROIPooling = 60,
ROIAlign = 61,
Packing = 62,
Requantize = 63,
Cast = 64,
HardSigmoid = 65,
SELU = 66,
HardSwish = 67,
Noop = 68,
PixelShuffle = 69,
DeepCopy = 70,
Mish = 71,
StatisticsPooling = 72,
Swish = 73,
Gemm = 74,
GroupNorm = 75,
LayerNorm = 76,
Softplus = 77,
GRU = 78,
MultiHeadAttention = 79,
GELU = 80,
Convolution1D = 81,
Pooling1D = 82,
ConvolutionDepthWise1D = 83,
Convolution3D = 84,
ConvolutionDepthWise3D = 85,
Pooling3D = 86,
MatMul = 87,
Deconvolution1D = 88,
DeconvolutionDepthWise1D = 89,
Deconvolution3D = 90,
DeconvolutionDepthWise3D = 91,
Einsum = 92,

1837
lib_ncnn/ncnn/mat.h Executable file

File diff suppressed because it is too large Load Diff

78
lib_ncnn/ncnn/modelbin.h Executable file
View File

@@ -0,0 +1,78 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_MODELBIN_H
#define NCNN_MODELBIN_H
#include "mat.h"
namespace ncnn {
class DataReader;
class NCNN_EXPORT ModelBin
{
public:
ModelBin();
virtual ~ModelBin();
// element type
// 0 = auto
// 1 = float32
// 2 = float16
// 3 = int8
// load vec
virtual Mat load(int w, int type) const = 0;
// load image
virtual Mat load(int w, int h, int type) const;
// load dim
virtual Mat load(int w, int h, int c, int type) const;
};
class ModelBinFromDataReaderPrivate;
class NCNN_EXPORT ModelBinFromDataReader : public ModelBin
{
public:
explicit ModelBinFromDataReader(const DataReader& dr);
virtual ~ModelBinFromDataReader();
virtual Mat load(int w, int type) const;
private:
ModelBinFromDataReader(const ModelBinFromDataReader&);
ModelBinFromDataReader& operator=(const ModelBinFromDataReader&);
private:
ModelBinFromDataReaderPrivate* const d;
};
class ModelBinFromMatArrayPrivate;
class NCNN_EXPORT ModelBinFromMatArray : public ModelBin
{
public:
// construct from weight blob array
explicit ModelBinFromMatArray(const Mat* weights);
virtual ~ModelBinFromMatArray();
virtual Mat load(int w, int type) const;
private:
ModelBinFromMatArray(const ModelBinFromMatArray&);
ModelBinFromMatArray& operator=(const ModelBinFromMatArray&);
private:
ModelBinFromMatArrayPrivate* const d;
};
} // namespace ncnn
#endif // NCNN_MODELBIN_H

42
lib_ncnn/ncnn/ncnn_export.h Executable file
View File

@@ -0,0 +1,42 @@
#ifndef NCNN_EXPORT_H
#define NCNN_EXPORT_H
#ifdef NCNN_STATIC_DEFINE
# define NCNN_EXPORT
# define NCNN_NO_EXPORT
#else
# ifndef NCNN_EXPORT
# ifdef ncnn_EXPORTS
/* We are building this library */
# define NCNN_EXPORT
# else
/* We are using this library */
# define NCNN_EXPORT
# endif
# endif
# ifndef NCNN_NO_EXPORT
# define NCNN_NO_EXPORT
# endif
#endif
#ifndef NCNN_DEPRECATED
# define NCNN_DEPRECATED __attribute__ ((__deprecated__))
#endif
#ifndef NCNN_DEPRECATED_EXPORT
# define NCNN_DEPRECATED_EXPORT NCNN_EXPORT NCNN_DEPRECATED
#endif
#ifndef NCNN_DEPRECATED_NO_EXPORT
# define NCNN_DEPRECATED_NO_EXPORT NCNN_NO_EXPORT NCNN_DEPRECATED
#endif
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef NCNN_NO_DEPRECATED
# define NCNN_NO_DEPRECATED
# endif
#endif
#endif /* NCNN_EXPORT_H */

272
lib_ncnn/ncnn/net.h Executable file
View File

@@ -0,0 +1,272 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_NET_H
#define NCNN_NET_H
#include "blob.h"
#include "layer.h"
#include "mat.h"
#include "option.h"
#include "platform.h"
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 9
#include <android/asset_manager.h>
#endif // __ANDROID_API__ >= 9
#endif // NCNN_PLATFORM_API
namespace ncnn {
#if NCNN_VULKAN
class VkCompute;
#endif // NCNN_VULKAN
class DataReader;
class Extractor;
class NetPrivate;
class NCNN_EXPORT Net
{
public:
// empty init
Net();
// clear and destroy
virtual ~Net();
public:
// option can be changed before loading
Option opt;
#if NCNN_VULKAN
// set gpu device by index
void set_vulkan_device(int device_index);
// set gpu device by device handle, no owner transfer
void set_vulkan_device(const VulkanDevice* vkdev);
const VulkanDevice* vulkan_device() const;
#endif // NCNN_VULKAN
#if NCNN_STRING
// register custom layer by layer type name
// return 0 if success
int register_custom_layer(const char* type, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0);
virtual int custom_layer_to_index(const char* type);
#endif // NCNN_STRING
// register custom layer by layer type
// return 0 if success
int register_custom_layer(int index, layer_creator_func creator, layer_destroyer_func destroyer = 0, void* userdata = 0);
#if NCNN_STRING
int load_param(const DataReader& dr);
#endif // NCNN_STRING
int load_param_bin(const DataReader& dr);
int load_model(const DataReader& dr);
#if NCNN_STDIO
#if NCNN_STRING
// load network structure from plain param file
// return 0 if success
int load_param(FILE* fp);
int load_param(const char* protopath);
int load_param_mem(const char* mem);
#endif // NCNN_STRING
// load network structure from binary param file
// return 0 if success
int load_param_bin(FILE* fp);
int load_param_bin(const char* protopath);
// load network weight data from model file
// return 0 if success
int load_model(FILE* fp);
int load_model(const char* modelpath);
#endif // NCNN_STDIO
// load network structure from external memory
// memory pointer must be 32-bit aligned
// return bytes consumed
int load_param(const unsigned char* mem);
// reference network weight data from external memory
// weight data is not copied but referenced
// so external memory should be retained when used
// memory pointer must be 32-bit aligned
// return bytes consumed
int load_model(const unsigned char* mem);
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 9
#if NCNN_STRING
// convenient load network structure from android asset plain param file
int load_param(AAsset* asset);
int load_param(AAssetManager* mgr, const char* assetpath);
#endif // NCNN_STRING
// convenient load network structure from android asset binary param file
int load_param_bin(AAsset* asset);
int load_param_bin(AAssetManager* mgr, const char* assetpath);
// convenient load network weight data from android asset model file
int load_model(AAsset* asset);
int load_model(AAssetManager* mgr, const char* assetpath);
#endif // __ANDROID_API__ >= 9
#endif // NCNN_PLATFORM_API
// unload network structure and weight data
void clear();
// construct an Extractor from network
Extractor create_extractor() const;
// get input/output indexes/names
const std::vector<int>& input_indexes() const;
const std::vector<int>& output_indexes() const;
#if NCNN_STRING
const std::vector<const char*>& input_names() const;
const std::vector<const char*>& output_names() const;
#endif
const std::vector<Blob>& blobs() const;
const std::vector<Layer*>& layers() const;
std::vector<Blob>& mutable_blobs();
std::vector<Layer*>& mutable_layers();
protected:
friend class Extractor;
#if NCNN_STRING
int find_blob_index_by_name(const char* name) const;
int find_layer_index_by_name(const char* name) const;
virtual Layer* create_custom_layer(const char* type);
#endif // NCNN_STRING
virtual Layer* create_custom_layer(int index);
private:
Net(const Net&);
Net& operator=(const Net&);
private:
NetPrivate* const d;
};
class ExtractorPrivate;
class NCNN_EXPORT Extractor
{
public:
virtual ~Extractor();
// copy
Extractor(const Extractor&);
// assign
Extractor& operator=(const Extractor&);
// clear blob mats and alloctors
void clear();
// enable light mode
// intermediate blob will be recycled when enabled
// enabled by default
void set_light_mode(bool enable);
// set thread count for this extractor
// this will overwrite the global setting
// default count is system depended
void set_num_threads(int num_threads);
// set blob memory allocator
void set_blob_allocator(Allocator* allocator);
// set workspace memory allocator
void set_workspace_allocator(Allocator* allocator);
#if NCNN_VULKAN
void set_vulkan_compute(bool enable);
void set_blob_vkallocator(VkAllocator* allocator);
void set_workspace_vkallocator(VkAllocator* allocator);
void set_staging_vkallocator(VkAllocator* allocator);
#endif // NCNN_VULKAN
#if NCNN_STRING
// set input by blob name
// return 0 if success
int input(const char* blob_name, const Mat& in);
// get result by blob name
// return 0 if success
// type = 0, default
// type = 1, do not convert fp16/bf16 or / and packing
int extract(const char* blob_name, Mat& feat, int type = 0);
#endif // NCNN_STRING
// set input by blob index
// return 0 if success
int input(int blob_index, const Mat& in);
// get result by blob index
// return 0 if success
// type = 0, default
// type = 1, do not convert fp16/bf16 or / and packing
int extract(int blob_index, Mat& feat, int type = 0);
#if NCNN_VULKAN
#if NCNN_STRING
// set input by blob name
// return 0 if success
int input(const char* blob_name, const VkMat& in);
// get result by blob name
// return 0 if success
int extract(const char* blob_name, VkMat& feat, VkCompute& cmd);
// set input by blob name
// return 0 if success
int input(const char* blob_name, const VkImageMat& in);
// get result by blob name
// return 0 if success
int extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd);
#endif // NCNN_STRING
// set input by blob index
// return 0 if success
int input(int blob_index, const VkMat& in);
// get result by blob index
// return 0 if success
int extract(int blob_index, VkMat& feat, VkCompute& cmd);
// set input by blob index
// return 0 if success
int input(int blob_index, const VkImageMat& in);
// get result by blob index
// return 0 if success
int extract(int blob_index, VkImageMat& feat, VkCompute& cmd);
#endif // NCNN_VULKAN
protected:
friend Extractor Net::create_extractor() const;
Extractor(const Net* net, size_t blob_count);
private:
ExtractorPrivate* const d;
};
} // namespace ncnn
#endif // NCNN_NET_H

153
lib_ncnn/ncnn/option.h Executable file
View File

@@ -0,0 +1,153 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_OPTION_H
#define NCNN_OPTION_H
#include "platform.h"
namespace ncnn {
#if NCNN_VULKAN
class VkAllocator;
class PipelineCache;
#endif // NCNN_VULKAN
class Allocator;
class NCNN_EXPORT Option
{
public:
// default option
Option();
public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;
// thread count
// default value is the one returned by get_cpu_count()
int num_threads;
// blob memory allocator
Allocator* blob_allocator;
// workspace memory allocator
Allocator* workspace_allocator;
#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;
// workspace memory allocator
VkAllocator* workspace_vkallocator;
// staging memory allocator
VkAllocator* staging_vkallocator;
// pipeline cache
PipelineCache* pipeline_cache;
#endif // NCNN_VULKAN
// the time openmp threads busy-wait for more work before going to sleep
// default value is 20ms to keep the cores enabled
// without too much extra power consumption afterwards
int openmp_blocktime;
// enable winograd convolution optimization
// improve convolution 3x3 stride1 performance, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_winograd_convolution;
// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performance, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_sgemm_convolution;
// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
bool use_int8_inference;
// enable vulkan compute
bool use_vulkan_compute;
// enable bf16 data type for storage
// improve most operator performance on all arm devices, may consume more memory
bool use_bf16_storage;
// enable options for gpu inference
bool use_fp16_packed;
bool use_fp16_storage;
bool use_fp16_arithmetic;
bool use_int8_packed;
bool use_int8_storage;
bool use_int8_arithmetic;
// enable simd-friendly packed memory layout
// improve all operator performance on all arm devices, will consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_packing_layout;
bool use_shader_pack8;
// subgroup option
bool use_subgroup_basic;
bool use_subgroup_vote;
bool use_subgroup_ballot;
bool use_subgroup_shuffle;
// turn on for adreno
bool use_image_storage;
bool use_tensor_storage;
bool use_reserved_0;
// enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
// default value is 3
// 0 = DAZ OFF, FTZ OFF
// 1 = DAZ ON , FTZ OFF
// 2 = DAZ OFF, FTZ ON
// 3 = DAZ ON, FTZ ON
int flush_denormals;
bool use_local_pool_allocator;
// enable local memory optimization for gpu inference
bool use_shader_local_memory;
// enable cooperative matrix optimization for gpu inference
bool use_cooperative_matrix;
// more fine-grained control of winograd convolution
bool use_winograd23_convolution;
bool use_winograd43_convolution;
bool use_winograd63_convolution;
bool use_reserved_6;
bool use_reserved_7;
bool use_reserved_8;
bool use_reserved_9;
bool use_reserved_10;
bool use_reserved_11;
};
} // namespace ncnn
#endif // NCNN_OPTION_H

73
lib_ncnn/ncnn/paramdict.h Executable file
View File

@@ -0,0 +1,73 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_PARAMDICT_H
#define NCNN_PARAMDICT_H
#include "mat.h"
// at most 32 parameters
#define NCNN_MAX_PARAM_COUNT 32
namespace ncnn {
class DataReader;
class Net;
class ParamDictPrivate;
class NCNN_EXPORT ParamDict
{
public:
// empty
ParamDict();
virtual ~ParamDict();
// copy
ParamDict(const ParamDict&);
// assign
ParamDict& operator=(const ParamDict&);
// get type
int type(int id) const;
// get int
int get(int id, int def) const;
// get float
float get(int id, float def) const;
// get array
Mat get(int id, const Mat& def) const;
// set int
void set(int id, int i);
// set float
void set(int id, float f);
// set array
void set(int id, const Mat& v);
protected:
friend class Net;
void clear();
int load_param(const DataReader& dr);
int load_param_bin(const DataReader& dr);
private:
ParamDictPrivate* const d;
};
} // namespace ncnn
#endif // NCNN_PARAMDICT_H

113
lib_ncnn/ncnn/pipeline.h Executable file
View File

@@ -0,0 +1,113 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_PIPELINE_H
#define NCNN_PIPELINE_H
#include "mat.h"
#include "platform.h"
#if NCNN_VULKAN
#include "gpu.h"
#include <vulkan/vulkan.h>
#endif // NCNN_VULKAN
namespace ncnn {
#if NCNN_VULKAN
class Option;
class PipelinePrivate;
class NCNN_EXPORT Pipeline
{
public:
explicit Pipeline(const VulkanDevice* vkdev);
virtual ~Pipeline();
public:
void set_optimal_local_size_xyz(int w = 4, int h = 4, int c = 4);
void set_optimal_local_size_xyz(const Mat& local_size_xyz);
void set_local_size_xyz(int w, int h, int c);
int create(const uint32_t* spv_data, size_t spv_data_size, const std::vector<vk_specialization_type>& specializations);
int create(int shader_type_index, const Option& opt, const std::vector<vk_specialization_type>& specializations);
public:
VkShaderModule shader_module() const;
VkDescriptorSetLayout descriptorset_layout() const;
VkPipelineLayout pipeline_layout() const;
VkPipeline pipeline() const;
VkDescriptorUpdateTemplateKHR descriptor_update_template() const;
const ShaderInfo& shader_info() const;
uint32_t local_size_x() const;
uint32_t local_size_y() const;
uint32_t local_size_z() const;
protected:
void set_shader_module(VkShaderModule shader_module);
void set_descriptorset_layout(VkDescriptorSetLayout descriptorset_layout);
void set_pipeline_layout(VkPipelineLayout pipeline_layout);
void set_pipeline(VkPipeline pipeline);
void set_descriptor_update_template(VkDescriptorUpdateTemplateKHR descriptor_update_template);
void set_shader_info(const ShaderInfo& shader_info);
public:
const VulkanDevice* vkdev;
private:
Pipeline(const Pipeline&);
Pipeline& operator=(const Pipeline&);
private:
PipelinePrivate* const d;
};
#if NCNN_PLATFORM_API
#if __ANDROID_API__ >= 26
class VkCompute;
class NCNN_EXPORT ImportAndroidHardwareBufferPipeline : private Pipeline
{
public:
explicit ImportAndroidHardwareBufferPipeline(const VulkanDevice* vkdev);
virtual ~ImportAndroidHardwareBufferPipeline();
int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, const Option& opt);
int create(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator, int type_to, int rotate_from, int target_width, int target_height, const Option& opt);
void destroy();
friend class VkCompute;
protected:
int create_shader_module(const Option& opt);
int create_sampler(VkAndroidHardwareBufferImageAllocator* ahb_im_allocator);
int create_descriptorset_layout();
public:
int type_to;
int rotate_from;
bool need_resize;
VkSampler sampler;
};
#endif // __ANDROID_API__ >= 26
#endif // NCNN_PLATFORM_API
#endif // NCNN_VULKAN
} // namespace ncnn
#endif // NCNN_PIPELINE_H

85
lib_ncnn/ncnn/pipelinecache.h Executable file
View File

@@ -0,0 +1,85 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_PIPELINECACHE_H
#define NCNN_PIPELINECACHE_H
#include "platform.h"
#if NCNN_VULKAN
#include <vulkan/vulkan.h>
#endif // NCNN_VULKAN
#include "mat.h"
#include "gpu.h"
namespace ncnn {
#if NCNN_VULKAN
class VulkanDevice;
class PipelineCachePrivate;
class NCNN_EXPORT PipelineCache
{
public:
explicit PipelineCache(const VulkanDevice* _vkdev);
virtual ~PipelineCache();
void clear();
int get_pipeline(const uint32_t* spv_data, size_t spv_data_size, const std::vector<vk_specialization_type>& specializations,
uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
VkShaderModule* shader_module,
VkDescriptorSetLayout* descriptorset_layout,
VkPipelineLayout* pipeline_layout,
VkPipeline* pipeline,
VkDescriptorUpdateTemplateKHR* descriptor_update_template,
ShaderInfo& shader_info) const;
int get_pipeline(int shader_type_index, const Option& opt, const std::vector<vk_specialization_type>& specializations,
uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
VkShaderModule* shader_module,
VkDescriptorSetLayout* descriptorset_layout,
VkPipelineLayout* pipeline_layout,
VkPipeline* pipeline,
VkDescriptorUpdateTemplateKHR* descriptor_update_template,
ShaderInfo& shader_info) const;
protected:
int create_shader_module(int shader_type_index, const Option& opt, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z,
VkShaderModule* _shader_module, ShaderInfo& si) const;
int new_pipeline(VkShaderModule shader_module, const ShaderInfo& shader_info, const std::vector<vk_specialization_type>& specializations,
VkDescriptorSetLayout* descriptorset_layout,
VkPipelineLayout* pipeline_layout,
VkPipeline* pipeline,
VkDescriptorUpdateTemplateKHR* descriptor_update_template) const;
protected:
const VulkanDevice* vkdev;
private:
PipelineCache(const PipelineCache&);
PipelineCache& operator=(const PipelineCache&);
private:
PipelineCachePrivate* const d;
};
#endif // NCNN_VULKAN
} // namespace ncnn
#endif // NCNN_PIPELINECACHE_H

273
lib_ncnn/ncnn/platform.h Executable file
View File

@@ -0,0 +1,273 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_PLATFORM_H
#define NCNN_PLATFORM_H
#define NCNN_STDIO 1
#define NCNN_STRING 1
#define NCNN_SIMPLEOCV 0
#define NCNN_SIMPLEOMP 0
#define NCNN_SIMPLESTL 0
#define NCNN_THREADS 1
#define NCNN_BENCHMARK 0
#define NCNN_C_API 1
#define NCNN_PLATFORM_API 1
#define NCNN_PIXEL 1
#define NCNN_PIXEL_ROTATE 1
#define NCNN_PIXEL_AFFINE 1
#define NCNN_PIXEL_DRAWING 1
#define NCNN_VULKAN 0
#define NCNN_SYSTEM_GLSLANG 0
#define NCNN_RUNTIME_CPU 1
#define NCNN_AVX 1
#define NCNN_XOP 1
#define NCNN_FMA 1
#define NCNN_F16C 1
#define NCNN_AVX2 1
#define NCNN_AVXVNNI 0
#define NCNN_AVX512 1
#define NCNN_AVX512VNNI 1
#if __aarch64__
#define NCNN_ARM82 0
#define NCNN_ARM82DOT 0
#endif // __aarch64__
#define NCNN_MSA 0
#define NCNN_MMI 0
#define NCNN_RVV 0
#define NCNN_INT8 1
#define NCNN_BF16 1
#define NCNN_FORCE_INLINE 1
#define NCNN_VERSION_STRING "1.0.20220617"
#include "ncnn_export.h"
#ifdef __cplusplus
#if NCNN_THREADS
#if (defined _WIN32 && !(defined __MINGW32__))
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <process.h>
#else
#include <pthread.h>
#endif
#endif // NCNN_THREADS
#if __ANDROID_API__ >= 26
#define VK_USE_PLATFORM_ANDROID_KHR
#endif // __ANDROID_API__ >= 26
namespace ncnn {
#if NCNN_THREADS
#if (defined _WIN32 && !(defined __MINGW32__))
class NCNN_EXPORT Mutex
{
public:
Mutex() { InitializeSRWLock(&srwlock); }
~Mutex() {}
void lock() { AcquireSRWLockExclusive(&srwlock); }
void unlock() { ReleaseSRWLockExclusive(&srwlock); }
private:
friend class ConditionVariable;
// NOTE SRWLock is available from windows vista
SRWLOCK srwlock;
};
class NCNN_EXPORT ConditionVariable
{
public:
ConditionVariable() { InitializeConditionVariable(&condvar); }
~ConditionVariable() {}
void wait(Mutex& mutex) { SleepConditionVariableSRW(&condvar, &mutex.srwlock, INFINITE, 0); }
void broadcast() { WakeAllConditionVariable(&condvar); }
void signal() { WakeConditionVariable(&condvar); }
private:
CONDITION_VARIABLE condvar;
};
static unsigned __stdcall start_wrapper(void* args);
class NCNN_EXPORT Thread
{
public:
Thread(void* (*start)(void*), void* args = 0) { _start = start; _args = args; handle = (HANDLE)_beginthreadex(0, 0, start_wrapper, this, 0, 0); }
~Thread() {}
void join() { WaitForSingleObject(handle, INFINITE); CloseHandle(handle); }
private:
friend unsigned __stdcall start_wrapper(void* args)
{
Thread* t = (Thread*)args;
t->_start(t->_args);
return 0;
}
HANDLE handle;
void* (*_start)(void*);
void* _args;
};
class NCNN_EXPORT ThreadLocalStorage
{
public:
ThreadLocalStorage() { key = TlsAlloc(); }
~ThreadLocalStorage() { TlsFree(key); }
void set(void* value) { TlsSetValue(key, (LPVOID)value); }
void* get() { return (void*)TlsGetValue(key); }
private:
DWORD key;
};
#else // (defined _WIN32 && !(defined __MINGW32__))
class NCNN_EXPORT Mutex
{
public:
Mutex() { pthread_mutex_init(&mutex, 0); }
~Mutex() { pthread_mutex_destroy(&mutex); }
void lock() { pthread_mutex_lock(&mutex); }
void unlock() { pthread_mutex_unlock(&mutex); }
private:
friend class ConditionVariable;
pthread_mutex_t mutex;
};
class NCNN_EXPORT ConditionVariable
{
public:
ConditionVariable() { pthread_cond_init(&cond, 0); }
~ConditionVariable() { pthread_cond_destroy(&cond); }
void wait(Mutex& mutex) { pthread_cond_wait(&cond, &mutex.mutex); }
void broadcast() { pthread_cond_broadcast(&cond); }
void signal() { pthread_cond_signal(&cond); }
private:
pthread_cond_t cond;
};
class NCNN_EXPORT Thread
{
public:
Thread(void* (*start)(void*), void* args = 0) { pthread_create(&t, 0, start, args); }
~Thread() {}
void join() { pthread_join(t, 0); }
private:
pthread_t t;
};
class NCNN_EXPORT ThreadLocalStorage
{
public:
ThreadLocalStorage() { pthread_key_create(&key, 0); }
~ThreadLocalStorage() { pthread_key_delete(key); }
void set(void* value) { pthread_setspecific(key, value); }
void* get() { return pthread_getspecific(key); }
private:
pthread_key_t key;
};
#endif // (defined _WIN32 && !(defined __MINGW32__))
#else // NCNN_THREADS
class NCNN_EXPORT Mutex
{
public:
Mutex() {}
~Mutex() {}
void lock() {}
void unlock() {}
};
class NCNN_EXPORT ConditionVariable
{
public:
ConditionVariable() {}
~ConditionVariable() {}
void wait(Mutex& /*mutex*/) {}
void broadcast() {}
void signal() {}
};
class NCNN_EXPORT Thread
{
public:
Thread(void* (*/*start*/)(void*), void* /*args*/ = 0) {}
~Thread() {}
void join() {}
};
class NCNN_EXPORT ThreadLocalStorage
{
public:
ThreadLocalStorage() { data = 0; }
~ThreadLocalStorage() {}
void set(void* value) { data = value; }
void* get() { return data; }
private:
void* data;
};
#endif // NCNN_THREADS
class NCNN_EXPORT MutexLockGuard
{
public:
MutexLockGuard(Mutex& _mutex) : mutex(_mutex) { mutex.lock(); }
~MutexLockGuard() { mutex.unlock(); }
private:
Mutex& mutex;
};
} // namespace ncnn
#if NCNN_SIMPLESTL
#include "simplestl.h"
#else
#include <algorithm>
#include <list>
#include <vector>
#include <string>
#endif
#endif // __cplusplus
#if NCNN_STDIO
#if NCNN_PLATFORM_API && __ANDROID_API__ >= 8
#include <android/log.h>
#define NCNN_LOGE(...) do { \
fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); \
__android_log_print(ANDROID_LOG_WARN, "ncnn", ##__VA_ARGS__); } while(0)
#else // NCNN_PLATFORM_API && __ANDROID_API__ >= 8
#include <stdio.h>
#define NCNN_LOGE(...) do { \
fprintf(stderr, ##__VA_ARGS__); fprintf(stderr, "\n"); } while(0)
#endif // NCNN_PLATFORM_API && __ANDROID_API__ >= 8
#else
#define NCNN_LOGE(...)
#endif
#if NCNN_FORCE_INLINE
#ifdef _MSC_VER
#define NCNN_FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define NCNN_FORCEINLINE inline __attribute__((__always_inline__))
#elif defined(__CLANG__)
#if __has_attribute(__always_inline__)
#define NCNN_FORCEINLINE inline __attribute__((__always_inline__))
#else
#define NCNN_FORCEINLINE inline
#endif
#else
#define NCNN_FORCEINLINE inline
#endif
#else
#define NCNN_FORCEINLINE inline
#endif
#endif // NCNN_PLATFORM_H

501
lib_ncnn/ncnn/simpleocv.h Executable file
View File

@@ -0,0 +1,501 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_SIMPLEOCV_H
#define NCNN_SIMPLEOCV_H
#include "platform.h"
#if NCNN_SIMPLEOCV
#include <limits.h>
#include <string.h>
#include "allocator.h"
#include "mat.h"
#if defined(_MSC_VER) || defined(__GNUC__)
#pragma push_macro("min")
#pragma push_macro("max")
#undef min
#undef max
#endif
#ifndef NCNN_XADD
using ncnn::NCNN_XADD;
#endif
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
enum
{
CV_LOAD_IMAGE_UNCHANGED = -1,
CV_LOAD_IMAGE_GRAYSCALE = 0,
CV_LOAD_IMAGE_COLOR = 1,
};
enum
{
CV_IMWRITE_JPEG_QUALITY = 1
};
// minimal opencv style data structure implementation
namespace cv {
template<typename _Tp>
static inline _Tp saturate_cast(int v)
{
return _Tp(v);
}
template<>
inline uchar saturate_cast<uchar>(int v)
{
return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
}
template<typename _Tp>
struct Scalar_
{
Scalar_()
{
v[0] = 0;
v[1] = 0;
v[2] = 0;
v[3] = 0;
}
Scalar_(_Tp _v0)
{
v[0] = _v0;
v[1] = 0;
v[2] = 0;
v[3] = 0;
}
Scalar_(_Tp _v0, _Tp _v1, _Tp _v2)
{
v[0] = _v0;
v[1] = _v1;
v[2] = _v2;
v[3] = 0;
}
Scalar_(_Tp _v0, _Tp _v1, _Tp _v2, _Tp _v3)
{
v[0] = _v0;
v[1] = _v1;
v[2] = _v2;
v[3] = _v3;
}
const _Tp operator[](const int i) const
{
return v[i];
}
_Tp operator[](const int i)
{
return v[i];
}
_Tp v[4];
};
typedef Scalar_<uchar> Scalar;
template<typename _Tp>
struct Point_
{
Point_()
: x(0), y(0)
{
}
Point_(_Tp _x, _Tp _y)
: x(_x), y(_y)
{
}
template<typename _Tp2>
operator Point_<_Tp2>() const
{
return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y));
}
_Tp x;
_Tp y;
};
typedef Point_<int> Point;
typedef Point_<float> Point2f;
template<typename _Tp>
struct Size_
{
Size_()
: width(0), height(0)
{
}
Size_(_Tp _w, _Tp _h)
: width(_w), height(_h)
{
}
template<typename _Tp2>
operator Size_<_Tp2>() const
{
return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
}
_Tp width;
_Tp height;
};
typedef Size_<int> Size;
typedef Size_<float> Size2f;
template<typename _Tp>
struct Rect_
{
Rect_()
: x(0), y(0), width(0), height(0)
{
}
Rect_(_Tp _x, _Tp _y, _Tp _w, _Tp _h)
: x(_x), y(_y), width(_w), height(_h)
{
}
Rect_(Point_<_Tp> _p, Size_<_Tp> _size)
: x(_p.x), y(_p.y), width(_size.width), height(_size.height)
{
}
template<typename _Tp2>
operator Rect_<_Tp2>() const
{
return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
}
_Tp x;
_Tp y;
_Tp width;
_Tp height;
// area
_Tp area() const
{
return width * height;
}
};
template<typename _Tp>
static inline Rect_<_Tp>& operator&=(Rect_<_Tp>& a, const Rect_<_Tp>& b)
{
_Tp x1 = std::max(a.x, b.x), y1 = std::max(a.y, b.y);
a.width = std::min(a.x + a.width, b.x + b.width) - x1;
a.height = std::min(a.y + a.height, b.y + b.height) - y1;
a.x = x1;
a.y = y1;
if (a.width <= 0 || a.height <= 0)
a = Rect_<_Tp>();
return a;
}
template<typename _Tp>
static inline Rect_<_Tp>& operator|=(Rect_<_Tp>& a, const Rect_<_Tp>& b)
{
_Tp x1 = std::min(a.x, b.x), y1 = std::min(a.y, b.y);
a.width = std::max(a.x + a.width, b.x + b.width) - x1;
a.height = std::max(a.y + a.height, b.y + b.height) - y1;
a.x = x1;
a.y = y1;
return a;
}
template<typename _Tp>
static inline Rect_<_Tp> operator&(const Rect_<_Tp>& a, const Rect_<_Tp>& b)
{
Rect_<_Tp> c = a;
return c &= b;
}
template<typename _Tp>
static inline Rect_<_Tp> operator|(const Rect_<_Tp>& a, const Rect_<_Tp>& b)
{
Rect_<_Tp> c = a;
return c |= b;
}
typedef Rect_<int> Rect;
typedef Rect_<float> Rect2f;
#define CV_8UC1 1
#define CV_8UC3 3
#define CV_8UC4 4
#define CV_32FC1 4
struct NCNN_EXPORT Mat
{
Mat()
: data(0), refcount(0), rows(0), cols(0), c(0)
{
}
Mat(int _rows, int _cols, int flags)
: data(0), refcount(0)
{
create(_rows, _cols, flags);
}
// copy
Mat(const Mat& m)
: data(m.data), refcount(m.refcount)
{
if (refcount)
NCNN_XADD(refcount, 1);
rows = m.rows;
cols = m.cols;
c = m.c;
}
Mat(int _rows, int _cols, int flags, void* _data)
: data((unsigned char*)_data), refcount(0)
{
rows = _rows;
cols = _cols;
c = flags;
}
~Mat()
{
release();
}
// assign
Mat& operator=(const Mat& m)
{
if (this == &m)
return *this;
if (m.refcount)
NCNN_XADD(m.refcount, 1);
release();
data = m.data;
refcount = m.refcount;
rows = m.rows;
cols = m.cols;
c = m.c;
return *this;
}
Mat& operator=(const Scalar& s)
{
if (total() > 0)
{
uchar* p = data;
for (int i = 0; i < cols * rows; i++)
{
for (int j = 0; j < c; j++)
{
*p++ = s[j];
}
}
}
return *this;
}
void create(int _rows, int _cols, int flags)
{
release();
rows = _rows;
cols = _cols;
c = flags;
if (total() > 0)
{
// refcount address must be aligned, so we expand totalsize here
size_t totalsize = (total() + 3) >> 2 << 2;
data = (uchar*)ncnn::fastMalloc(totalsize + (int)sizeof(*refcount));
refcount = (int*)(((uchar*)data) + totalsize);
*refcount = 1;
}
}
void release()
{
if (refcount && NCNN_XADD(refcount, -1) == 1)
ncnn::fastFree(data);
data = 0;
rows = 0;
cols = 0;
c = 0;
refcount = 0;
}
Mat clone() const
{
if (empty())
return Mat();
Mat m(rows, cols, c);
if (total() > 0)
{
memcpy(m.data, data, total());
}
return m;
}
bool empty() const
{
return data == 0 || total() == 0;
}
int channels() const
{
return c;
}
int type() const
{
return c;
}
size_t total() const
{
return cols * rows * c;
}
const uchar* ptr(int y) const
{
return data + y * cols * c;
}
uchar* ptr(int y)
{
return data + y * cols * c;
}
template<typename _Tp>
const _Tp* ptr(int y) const
{
return (const _Tp*)data + y * cols * c;
}
template<typename _Tp>
_Tp* ptr(int y)
{
return (_Tp*)data + y * cols * c;
}
// roi
Mat operator()(const Rect& roi) const
{
if (empty())
return Mat();
Mat m(roi.height, roi.width, c);
int sy = roi.y;
for (int y = 0; y < roi.height; y++)
{
const uchar* sptr = ptr(sy) + roi.x * c;
uchar* dptr = m.ptr(y);
memcpy(dptr, sptr, roi.width * c);
sy++;
}
return m;
}
uchar* data;
// pointer to the reference counter;
// when points to user-allocated data, the pointer is NULL
int* refcount;
int rows;
int cols;
int c;
};
enum ImreadModes
{
IMREAD_UNCHANGED = -1,
IMREAD_GRAYSCALE = 0,
IMREAD_COLOR = 1
};
NCNN_EXPORT Mat imread(const std::string& path, int flags = IMREAD_COLOR);
enum ImwriteFlags
{
IMWRITE_JPEG_QUALITY = 1
};
NCNN_EXPORT bool imwrite(const std::string& path, const Mat& m, const std::vector<int>& params = std::vector<int>());
NCNN_EXPORT void imshow(const std::string& name, const Mat& m);
NCNN_EXPORT int waitKey(int delay = 0);
#if NCNN_PIXEL
NCNN_EXPORT void resize(const Mat& src, Mat& dst, const Size& size, float sw = 0.f, float sh = 0.f, int flags = 0);
#endif // NCNN_PIXEL
#if NCNN_PIXEL_DRAWING
enum
{
FILLED = -1
};
NCNN_EXPORT void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness = 1);
NCNN_EXPORT void rectangle(Mat& img, Rect rec, const Scalar& color, int thickness = 1);
NCNN_EXPORT void circle(Mat& img, Point center, int radius, const Scalar& color, int thickness = 1);
NCNN_EXPORT void line(Mat& img, Point p0, Point p1, const Scalar& color, int thickness = 1);
enum
{
FONT_HERSHEY_SIMPLEX = 0
};
NCNN_EXPORT void putText(Mat& img, const std::string& text, Point org, int fontFace, double fontScale, Scalar color, int thickness = 1);
NCNN_EXPORT Size getTextSize(const std::string& text, int fontFace, double fontScale, int thickness, int* baseLine);
#endif // NCNN_PIXEL_DRAWING
} // namespace cv
#if defined(_MSC_VER) || defined(__GNUC__)
#pragma pop_macro("min")
#pragma pop_macro("max")
#endif
#endif // NCNN_SIMPLEOCV
#endif // NCNN_SIMPLEOCV_H

53
lib_ncnn/ncnn/simpleomp.h Executable file
View File

@@ -0,0 +1,53 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_SIMPLEOMP_H
#define NCNN_SIMPLEOMP_H
#include "platform.h"
#if NCNN_SIMPLEOMP
#include <stdint.h>
// This minimal openmp runtime implementation only supports the llvm openmp abi
// and only supports #pragma omp parallel for num_threads(X)
#ifdef __cplusplus
extern "C" {
#endif
NCNN_EXPORT int omp_get_max_threads();
NCNN_EXPORT void omp_set_num_threads(int num_threads);
NCNN_EXPORT int omp_get_dynamic();
NCNN_EXPORT void omp_set_dynamic(int dynamic);
NCNN_EXPORT int omp_get_num_threads();
NCNN_EXPORT int omp_get_thread_num();
NCNN_EXPORT int kmp_get_blocktime();
NCNN_EXPORT void kmp_set_blocktime(int blocktime);
#ifdef __cplusplus
}
#endif
#endif // NCNN_SIMPLEOMP
#endif // NCNN_SIMPLEOMP_H

565
lib_ncnn/ncnn/simplestl.h Executable file
View File

@@ -0,0 +1,565 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_SIMPLESTL_H
#define NCNN_SIMPLESTL_H
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#if !NCNN_SIMPLESTL
#include <new>
#else
// allocation functions
NCNN_EXPORT void* operator new(size_t size);
NCNN_EXPORT void* operator new[](size_t size);
// placement allocation functions
NCNN_EXPORT void* operator new(size_t size, void* ptr);
NCNN_EXPORT void* operator new[](size_t size, void* ptr);
// deallocation functions
NCNN_EXPORT void operator delete(void* ptr);
NCNN_EXPORT void operator delete[](void* ptr);
// deallocation functions since c++14
#if __cplusplus >= 201402L
NCNN_EXPORT void operator delete(void* ptr, size_t sz);
NCNN_EXPORT void operator delete[](void* ptr, size_t sz);
#endif
// placement deallocation functions
NCNN_EXPORT void operator delete(void* ptr, void* voidptr2);
NCNN_EXPORT void operator delete[](void* ptr, void* voidptr2);
#endif
// minimal stl data structure implementation
namespace std {
template<typename T>
const T& max(const T& a, const T& b)
{
return (a < b) ? b : a;
}
template<typename T>
const T& min(const T& a, const T& b)
{
return (a > b) ? b : a;
}
template<typename T>
void swap(T& a, T& b)
{
T temp(a);
a = b;
b = temp;
}
template<typename T1, typename T2>
struct pair
{
pair()
: first(), second()
{
}
pair(const T1& t1, const T2& t2)
: first(t1), second(t2)
{
}
T1 first;
T2 second;
};
template<typename T1, typename T2>
bool operator==(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return (x.first == y.first && x.second == y.second);
}
template<typename T1, typename T2>
bool operator<(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
}
template<typename T1, typename T2>
bool operator!=(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return !(x == y);
}
template<typename T1, typename T2>
bool operator>(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return y < x;
}
template<typename T1, typename T2>
bool operator<=(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return !(y < x);
}
template<typename T1, typename T2>
bool operator>=(const pair<T1, T2>& x, const pair<T1, T2>& y)
{
return !(x < y);
}
template<typename T1, typename T2>
pair<T1, T2> make_pair(const T1& t1, const T2& t2)
{
return pair<T1, T2>(t1, t2);
}
template<typename T>
struct node
{
node* prev_;
node* next_;
T data_;
node()
: prev_(0), next_(0), data_()
{
}
node(const T& t)
: prev_(0), next_(0), data_(t)
{
}
};
template<typename T>
struct iter_list
{
iter_list()
: curr_(0)
{
}
iter_list(node<T>* n)
: curr_(n)
{
}
iter_list(const iter_list& i)
: curr_(i.curr_)
{
}
~iter_list()
{
}
iter_list& operator=(const iter_list& i)
{
curr_ = i.curr_;
return *this;
}
T& operator*()
{
return curr_->data_;
}
T* operator->()
{
return &(curr_->data_);
}
bool operator==(const iter_list& i)
{
return curr_ == i.curr_;
}
bool operator!=(const iter_list& i)
{
return curr_ != i.curr_;
}
iter_list& operator++()
{
curr_ = curr_->next_;
return *this;
}
iter_list& operator--()
{
curr_ = curr_->prev_;
return *this;
}
node<T>* curr_;
};
template<typename T>
struct list
{
typedef iter_list<T> iterator;
list()
{
head_ = new node<T>();
tail_ = head_;
count_ = 0;
}
~list()
{
clear();
delete head_;
}
list(const list& l)
{
head_ = new node<T>();
tail_ = head_;
count_ = 0;
for (iter_list<T> i = l.begin(); i != l.end(); ++i)
{
push_back(*i);
}
}
list& operator=(const list& l)
{
if (this == &l)
{
return *this;
}
clear();
for (iter_list<T> i = l.begin(); i != l.end(); ++i)
{
push_back(*i);
}
return *this;
}
void clear()
{
while (count_ > 0)
{
pop_front();
}
}
void pop_front()
{
if (count_ > 0)
{
head_ = head_->next_;
delete head_->prev_;
head_->prev_ = 0;
--count_;
}
}
size_t size() const
{
return count_;
}
iter_list<T> begin() const
{
return iter_list<T>(head_);
}
iter_list<T> end() const
{
return iter_list<T>(tail_);
}
bool empty() const
{
return count_ == 0;
}
void push_back(const T& t)
{
if (count_ == 0)
{
head_ = new node<T>(t);
head_->prev_ = 0;
head_->next_ = tail_;
tail_->prev_ = head_;
count_ = 1;
}
else
{
node<T>* temp = new node<T>(t);
temp->prev_ = tail_->prev_;
temp->next_ = tail_;
tail_->prev_->next_ = temp;
tail_->prev_ = temp;
++count_;
}
}
iter_list<T> erase(iter_list<T> pos)
{
if (pos != end())
{
node<T>* temp = pos.curr_;
if (temp == head_)
{
++pos;
temp->next_->prev_ = 0;
head_ = temp->next_;
}
else
{
--pos;
temp->next_->prev_ = temp->prev_;
temp->prev_->next_ = temp->next_;
++pos;
}
delete temp;
--count_;
}
return pos;
}
protected:
node<T>* head_;
node<T>* tail_;
size_t count_;
};
template<typename T>
struct greater
{
bool operator()(const T& x, const T& y) const
{
return (x > y);
}
};
template<typename T>
struct less
{
bool operator()(const T& x, const T& y) const
{
return (x < y);
}
};
template<typename RandomAccessIter, typename Compare>
void partial_sort(RandomAccessIter first, RandomAccessIter middle, RandomAccessIter last, Compare comp)
{
// [TODO] heap sort should be used here, but we simply use bubble sort now
for (RandomAccessIter i = first; i < middle; ++i)
{
// bubble sort
for (RandomAccessIter j = last - 1; j > first; --j)
{
if (comp(*j, *(j - 1)))
{
swap(*j, *(j - 1));
}
}
}
}
template<typename T>
struct vector
{
vector()
: data_(0), size_(0), capacity_(0)
{
}
vector(const size_t new_size, const T& value = T())
: data_(0), size_(0), capacity_(0)
{
resize(new_size, value);
}
~vector()
{
clear();
}
vector(const vector& v)
: data_(0), size_(0), capacity_(0)
{
resize(v.size());
for (size_t i = 0; i < size_; i++)
{
data_[i] = v.data_[i];
}
}
vector& operator=(const vector& v)
{
if (this == &v)
{
return *this;
}
resize(0);
resize(v.size());
for (size_t i = 0; i < size_; i++)
{
data_[i] = v.data_[i];
}
return *this;
}
void resize(const size_t new_size, const T& value = T())
{
try_alloc(new_size);
if (new_size > size_)
{
for (size_t i = size_; i < new_size; i++)
{
new (&data_[i]) T(value);
}
}
else if (new_size < size_)
{
for (size_t i = new_size; i < size_; i++)
{
data_[i].~T();
}
}
size_ = new_size;
}
void clear()
{
for (size_t i = 0; i < size_; i++)
{
data_[i].~T();
}
delete[](char*) data_;
data_ = 0;
size_ = 0;
capacity_ = 0;
}
T* data() const
{
return data_;
}
size_t size() const
{
return size_;
}
T& operator[](size_t i) const
{
return data_[i];
}
T* begin() const
{
return &data_[0];
}
T* end() const
{
return &data_[size_];
}
bool empty() const
{
return size_ == 0;
}
void push_back(const T& t)
{
try_alloc(size_ + 1);
new (&data_[size_]) T(t);
size_++;
}
void insert(T* pos, T* b, T* e)
{
vector* v = 0;
if (b >= begin() && b < end())
{
//the same vector
v = new vector(*this);
b = v->begin() + (b - begin());
e = v->begin() + (e - begin());
}
size_t diff = pos - begin();
try_alloc(size_ + (e - b));
pos = begin() + diff;
memmove(pos + (e - b), pos, (end() - pos) * sizeof(T));
size_t len = e - b;
size_ += len;
for (size_t i = 0; i < len; i++)
{
*pos = *b;
pos++;
b++;
}
delete v;
}
T* erase(T* pos)
{
pos->~T();
memmove(pos, pos + 1, (end() - pos - 1) * sizeof(T));
size_--;
return pos;
}
protected:
T* data_;
size_t size_;
size_t capacity_;
void try_alloc(size_t new_size)
{
if (new_size * 3 / 2 > capacity_ / 2)
{
capacity_ = new_size * 2;
T* new_data = (T*)new char[capacity_ * sizeof(T)];
memset(new_data, 0, capacity_ * sizeof(T));
if (data_)
{
memmove(new_data, data_, sizeof(T) * size_);
delete[](char*) data_;
}
data_ = new_data;
}
}
};
struct NCNN_EXPORT string : public vector<char>
{
string()
{
}
string(const char* str)
{
size_t len = strlen(str);
resize(len);
memcpy(data_, str, len);
}
const char* c_str() const
{
return (const char*)data_;
}
bool operator==(const string& str2) const
{
return strcmp(data_, str2.data_) == 0;
}
bool operator==(const char* str2) const
{
return strcmp(data_, str2) == 0;
}
bool operator!=(const char* str2) const
{
return strcmp(data_, str2) != 0;
}
string& operator+=(const string& str1)
{
insert(end(), str1.begin(), str1.end());
return *this;
}
};
inline string operator+(const string& str1, const string& str2)
{
string str(str1);
str.insert(str.end(), str2.begin(), str2.end());
return str;
}
} // namespace std
#endif // NCNN_SIMPLESTL_H

251
lib_ncnn/ncnn/vulkan_header_fix.h Executable file
View File

@@ -0,0 +1,251 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef NCNN_VULKAN_HEADER_FIX_H
#define NCNN_VULKAN_HEADER_FIX_H
#include <vulkan/vulkan.h>
// This header contains new structure and function declearation to fix build with old vulkan sdk
#if VK_HEADER_VERSION < 70
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES (VkStructureType)1000094000
typedef enum VkSubgroupFeatureFlagBits
{
VK_SUBGROUP_FEATURE_BASIC_BIT = 0x00000001,
VK_SUBGROUP_FEATURE_VOTE_BIT = 0x00000002,
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT = 0x00000004,
VK_SUBGROUP_FEATURE_BALLOT_BIT = 0x00000008,
VK_SUBGROUP_FEATURE_SHUFFLE_BIT = 0x00000010,
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT = 0x00000020,
VK_SUBGROUP_FEATURE_CLUSTERED_BIT = 0x00000040,
VK_SUBGROUP_FEATURE_QUAD_BIT = 0x00000080,
VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV = 0x00000100,
VK_SUBGROUP_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkSubgroupFeatureFlagBits;
typedef VkFlags VkSubgroupFeatureFlags;
typedef struct VkPhysicalDeviceSubgroupProperties
{
VkStructureType sType;
void* pNext;
uint32_t subgroupSize;
VkShaderStageFlags supportedStages;
VkSubgroupFeatureFlags supportedOperations;
VkBool32 quadOperationsInAllStages;
} VkPhysicalDeviceSubgroupProperties;
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES (VkStructureType)1000168000
#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_SUPPORT (VkStructureType)1000168001
typedef struct VkPhysicalDeviceMaintenance3Properties
{
VkStructureType sType;
void* pNext;
uint32_t maxPerSetDescriptors;
VkDeviceSize maxMemoryAllocationSize;
} VkPhysicalDeviceMaintenance3Properties;
typedef struct VkDescriptorSetLayoutSupport
{
VkStructureType sType;
void* pNext;
VkBool32 supported;
} VkDescriptorSetLayoutSupport;
typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR;
typedef VkDescriptorSetLayoutSupport VkDescriptorSetLayoutSupportKHR;
typedef void(VKAPI_PTR* PFN_vkGetDescriptorSetLayoutSupportKHR)(VkDevice device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport);
#endif // VK_HEADER_VERSION < 70
#if VK_HEADER_VERSION < 80
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR (VkStructureType)1000177000
typedef struct VkPhysicalDevice8BitStorageFeaturesKHR
{
VkStructureType sType;
void* pNext;
VkBool32 storageBuffer8BitAccess;
VkBool32 uniformAndStorageBuffer8BitAccess;
VkBool32 storagePushConstant8;
} VkPhysicalDevice8BitStorageFeaturesKHR;
#define VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR (VkStructureType)1000109000
#define VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR (VkStructureType)1000109001
#define VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR (VkStructureType)1000109002
#define VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR (VkStructureType)1000109003
#define VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR (VkStructureType)1000109004
#define VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR (VkStructureType)1000109005
#define VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR (VkStructureType)1000109006
typedef struct VkAttachmentDescription2KHR
{
VkStructureType sType;
const void* pNext;
VkAttachmentDescriptionFlags flags;
VkFormat format;
VkSampleCountFlagBits samples;
VkAttachmentLoadOp loadOp;
VkAttachmentStoreOp storeOp;
VkAttachmentLoadOp stencilLoadOp;
VkAttachmentStoreOp stencilStoreOp;
VkImageLayout initialLayout;
VkImageLayout finalLayout;
} VkAttachmentDescription2KHR;
typedef struct VkAttachmentReference2KHR
{
VkStructureType sType;
const void* pNext;
uint32_t attachment;
VkImageLayout layout;
VkImageAspectFlags aspectMask;
} VkAttachmentReference2KHR;
typedef struct VkSubpassDescription2KHR
{
VkStructureType sType;
const void* pNext;
VkSubpassDescriptionFlags flags;
VkPipelineBindPoint pipelineBindPoint;
uint32_t viewMask;
uint32_t inputAttachmentCount;
const VkAttachmentReference2KHR* pInputAttachments;
uint32_t colorAttachmentCount;
const VkAttachmentReference2KHR* pColorAttachments;
const VkAttachmentReference2KHR* pResolveAttachments;
const VkAttachmentReference2KHR* pDepthStencilAttachment;
uint32_t preserveAttachmentCount;
const uint32_t* pPreserveAttachments;
} VkSubpassDescription2KHR;
typedef struct VkSubpassDependency2KHR
{
VkStructureType sType;
const void* pNext;
uint32_t srcSubpass;
uint32_t dstSubpass;
VkPipelineStageFlags srcStageMask;
VkPipelineStageFlags dstStageMask;
VkAccessFlags srcAccessMask;
VkAccessFlags dstAccessMask;
VkDependencyFlags dependencyFlags;
int32_t viewOffset;
} VkSubpassDependency2KHR;
typedef struct VkRenderPassCreateInfo2KHR
{
VkStructureType sType;
const void* pNext;
VkRenderPassCreateFlags flags;
uint32_t attachmentCount;
const VkAttachmentDescription2KHR* pAttachments;
uint32_t subpassCount;
const VkSubpassDescription2KHR* pSubpasses;
uint32_t dependencyCount;
const VkSubpassDependency2KHR* pDependencies;
uint32_t correlatedViewMaskCount;
const uint32_t* pCorrelatedViewMasks;
} VkRenderPassCreateInfo2KHR;
typedef struct VkSubpassBeginInfoKHR
{
VkStructureType sType;
const void* pNext;
VkSubpassContents contents;
} VkSubpassBeginInfoKHR;
typedef struct VkSubpassEndInfoKHR
{
VkStructureType sType;
const void* pNext;
} VkSubpassEndInfoKHR;
typedef VkResult(VKAPI_PTR* PFN_vkCreateRenderPass2KHR)(VkDevice device, const VkRenderPassCreateInfo2KHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkRenderPass* pRenderPass);
typedef void(VKAPI_PTR* PFN_vkCmdBeginRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBegin, const VkSubpassBeginInfoKHR* pSubpassBeginInfo);
typedef void(VKAPI_PTR* PFN_vkCmdNextSubpass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfoKHR* pSubpassBeginInfo, const VkSubpassEndInfoKHR* pSubpassEndInfo);
typedef void(VKAPI_PTR* PFN_vkCmdEndRenderPass2KHR)(VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR* pSubpassEndInfo);
#endif // VK_HEADER_VERSION < 80
#if VK_HEADER_VERSION < 95
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR (VkStructureType)1000082000
typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR
{
VkStructureType sType;
void* pNext;
VkBool32 shaderFloat16;
VkBool32 shaderInt8;
} VkPhysicalDeviceFloat16Int8FeaturesKHR;
#endif // VK_HEADER_VERSION < 95
#if VK_HEADER_VERSION < 97
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT (VkStructureType)1000237000
typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT
{
VkStructureType sType;
void* pNext;
VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS];
VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS];
} VkPhysicalDeviceMemoryBudgetPropertiesEXT;
#endif // VK_HEADER_VERSION < 97
#if VK_HEADER_VERSION < 101
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV (VkStructureType)1000249000
#define VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249001
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV (VkStructureType)1000249002
typedef enum VkComponentTypeNV
{
VK_COMPONENT_TYPE_FLOAT16_NV = 0,
VK_COMPONENT_TYPE_FLOAT32_NV = 1,
VK_COMPONENT_TYPE_FLOAT64_NV = 2,
VK_COMPONENT_TYPE_SINT8_NV = 3,
VK_COMPONENT_TYPE_SINT16_NV = 4,
VK_COMPONENT_TYPE_SINT32_NV = 5,
VK_COMPONENT_TYPE_SINT64_NV = 6,
VK_COMPONENT_TYPE_UINT8_NV = 7,
VK_COMPONENT_TYPE_UINT16_NV = 8,
VK_COMPONENT_TYPE_UINT32_NV = 9,
VK_COMPONENT_TYPE_UINT64_NV = 10,
VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV,
VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV,
VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1),
VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
} VkComponentTypeNV;
typedef enum VkScopeNV
{
VK_SCOPE_DEVICE_NV = 1,
VK_SCOPE_WORKGROUP_NV = 2,
VK_SCOPE_SUBGROUP_NV = 3,
VK_SCOPE_QUEUE_FAMILY_NV = 5,
VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV,
VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV,
VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1),
VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF
} VkScopeNV;
typedef struct VkCooperativeMatrixPropertiesNV
{
VkStructureType sType;
void* pNext;
uint32_t MSize;
uint32_t NSize;
uint32_t KSize;
VkComponentTypeNV AType;
VkComponentTypeNV BType;
VkComponentTypeNV CType;
VkComponentTypeNV DType;
VkScopeNV scope;
} VkCooperativeMatrixPropertiesNV;
typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV
{
VkStructureType sType;
void* pNext;
VkBool32 cooperativeMatrix;
VkBool32 cooperativeMatrixRobustBufferAccess;
} VkPhysicalDeviceCooperativeMatrixFeaturesNV;
typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV
{
VkStructureType sType;
void* pNext;
VkShaderStageFlags cooperativeMatrixSupportedStages;
} VkPhysicalDeviceCooperativeMatrixPropertiesNV;
typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties);
#endif // VK_HEADER_VERSION < 101
#endif // NCNN_VULKAN_HEADER_FIX_H