diff --git a/src/layer.h b/src/layer.h index 46fed5e456c..d02f65bbca9 100644 --- a/src/layer.h +++ b/src/layer.h @@ -96,10 +96,9 @@ class NCNN_EXPORT Layer bool support_reserved_7; bool support_reserved_8; bool support_reserved_9; - bool support_reserved_10; - bool support_reserved_11; - bool support_reserved_12; - bool support_reserved_13; + + // feature disabled set + int featmask; public: // implement inference diff --git a/src/layer/vulkan/convolution_vulkan.cpp b/src/layer/vulkan/convolution_vulkan.cpp index 5a73695e7c1..22e817d34e6 100644 --- a/src/layer/vulkan/convolution_vulkan.cpp +++ b/src/layer/vulkan/convolution_vulkan.cpp @@ -794,7 +794,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt) convert_packing(bias_data, bias_data_packed, out_elempack, opt); } - if (opt.use_sgemm_convolution && !is_conv1x1s1d1 && num_input >= 16 && num_output >= 16) + if (opt.use_winograd_convolution && (opt.use_winograd23_convolution || opt.use_winograd43_convolution) && is_conv3x3s1d1 && num_input >= 16 && num_output >= 16) + { + // pass + } + else if (opt.use_sgemm_convolution && !is_conv1x1s1d1 && num_input >= 16 && num_output >= 16) { bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0; @@ -872,7 +876,7 @@ int Convolution_vulkan::create_pipeline(const Option& _opt) } pipeline_convolution_gemm->create(shader_type_index, opt, specializations); } - if (is_conv1x1s1d1) + else if (is_conv1x1s1d1) { bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && num_input % 8 == 0 && num_output % 8 == 0; @@ -1221,13 +1225,16 @@ int Convolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCom bool use_cooperative_matrix = vkdev->info.support_cooperative_matrix_16_8_8() && opt.use_cooperative_matrix && !opt.use_image_storage && !opt.use_shader_pack8 && opt.use_fp16_storage && channels * elempack % 8 == 0 && num_output % 8 == 0; bool pre_winograd43 = opt.use_winograd43_convolution; - if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24)))) - pre_winograd43 = false; - if (vkdev->info.type() != 0 && (w <= 12 && h <= 12)) - pre_winograd43 = false; + if (opt.use_winograd23_convolution) + { + if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24)))) + pre_winograd43 = false; + if (vkdev->info.type() != 0 && (w <= 12 && h <= 12)) + pre_winograd43 = false; - if (use_cooperative_matrix && (w <= 18 && h <= 18)) - pre_winograd43 = false; + if (use_cooperative_matrix && (w <= 18 && h <= 18)) + pre_winograd43 = false; + } if (pre_winograd43) { @@ -1660,10 +1667,13 @@ int Convolution_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_b if (opt.use_winograd_convolution && (opt.use_winograd23_convolution || opt.use_winograd43_convolution) && is_conv3x3s1d1 && channels * elempack >= 16 && num_output >= 16) { bool pre_winograd43 = opt.use_winograd43_convolution; - if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24)))) - pre_winograd43 = false; - if (vkdev->info.type() != 0 && (w <= 12 && h <= 12)) - pre_winograd43 = false; + if (opt.use_winograd23_convolution) + { + if (vkdev->info.type() == 0 && ((w <= 18 && h <= 18) || ((w >= 23 && w <= 24) && (h >= 23 && h <= 24)))) + pre_winograd43 = false; + if (vkdev->info.type() != 0 && (w <= 12 && h <= 12)) + pre_winograd43 = false; + } if (pre_winograd43) { diff --git a/src/net.cpp b/src/net.cpp index baa6ebea6b9..8a09ebdc1ef 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -108,6 +108,26 @@ NetPrivate::NetPrivate(Option& _opt) #endif // NCNN_VULKAN } +static Option get_masked_option(const Option& opt, int featmask) +{ + // mask option usage as layer specific featmask + Option opt1 = opt; + opt1.use_fp16_arithmetic = opt1.use_fp16_arithmetic && !(featmask & (1 << 0)); + opt1.use_fp16_storage = opt1.use_fp16_storage && !(featmask & (1 << 1)); + opt1.use_fp16_packed = opt1.use_fp16_packed && !(featmask & (1 << 1)); + opt1.use_bf16_storage = opt1.use_bf16_storage && !(featmask & (1 << 2)); + opt1.use_int8_packed = opt1.use_int8_packed && !(featmask & (1 << 3)); + opt1.use_int8_storage = opt1.use_int8_storage && !(featmask & (1 << 3)); + opt1.use_int8_arithmetic = opt1.use_int8_arithmetic && !(featmask & (1 << 3)); + opt1.use_vulkan_compute = opt1.use_vulkan_compute && !(featmask & (1 << 4)); + opt1.use_image_storage = opt1.use_image_storage && !(featmask & (1 << 4)); + opt1.use_tensor_storage = opt1.use_tensor_storage && !(featmask & (1 << 4)); + opt1.use_sgemm_convolution = opt1.use_sgemm_convolution && !(featmask & (1 << 5)); + opt1.use_winograd_convolution = opt1.use_winograd_convolution && !(featmask & (1 << 6)); + + return opt1; +} + #if NCNN_VULKAN int NetPrivate::upload_model() { @@ -132,7 +152,7 @@ int NetPrivate::upload_model() { if (layers[i]->support_vulkan) { - int uret = layers[i]->upload_model(cmd, opt_upload); + int uret = layers[i]->upload_model(cmd, get_masked_option(opt_upload, layers[i]->featmask)); if (uret != 0) { NCNN_LOGE("layer upload_model %d failed", (int)i); @@ -195,7 +215,15 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, cons bottom_blob.elemsize = blob_mats[bottom_blob_index].elemsize; } #endif - int ret = do_forward_layer(layer, blob_mats, opt); + int ret = 0; + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats, opt); + } #if NCNN_BENCHMARK double end = get_current_time(); if (layer->one_blob_only) @@ -352,7 +380,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, std: #if NCNN_BENCHMARK cmd.record_write_timestamp(layer_index * 2); #endif - ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); + } #if NCNN_BENCHMARK cmd.record_write_timestamp(layer_index * 2 + 1); #endif @@ -368,7 +403,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, std: bottom_blob = blob_mats[bottom_blob_index].shape(); } #endif - ret = do_forward_layer(layer, blob_mats, opt); + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats, opt); + } #if NCNN_BENCHMARK double end = get_current_time(); if (layer->one_blob_only) @@ -677,7 +719,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, std: #endif if (layer->support_image_storage) { - ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, opt); + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats_gpu_image, cmd, opt); + } if (ret == -100) { image_allocation_failed = true; @@ -686,7 +735,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, std: } else { - ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats_gpu, cmd, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats_gpu, cmd, opt); + } } #if NCNN_BENCHMARK cmd.record_write_timestamp(layer_index * 2 + 1); @@ -703,7 +759,14 @@ int NetPrivate::forward_layer(int layer_index, std::vector& blob_mats, std: bottom_blob = blob_mats[bottom_blob_index].shape(); } #endif - ret = do_forward_layer(layer, blob_mats, opt); + if (layer->featmask) + { + ret = do_forward_layer(layer, blob_mats, get_masked_option(opt, layer->featmask)); + } + else + { + ret = do_forward_layer(layer, blob_mats, opt); + } #if NCNN_BENCHMARK double end = get_current_time(); if (layer->one_blob_only) @@ -790,6 +853,7 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // *INDENT-ON* // clang-format on + int dst_elempack = 1; if (opt.use_packing_layout) { // resolve dst_elempack @@ -801,7 +865,6 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio int elembits = bottom_blob.elembits(); - int dst_elempack = 1; if (layer->support_packing) { if (elembits == 32) @@ -855,13 +918,13 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio #endif } } + } - if (bottom_blob.elempack != dst_elempack) - { - Mat bottom_blob_packed; - convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt); - bottom_blob = bottom_blob_packed; - } + if (bottom_blob.elempack != dst_elempack) + { + Mat bottom_blob_packed; + convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt); + bottom_blob = bottom_blob_packed; } return 0; @@ -1571,6 +1634,9 @@ int Net::load_param(const DataReader& dr) layer->top_shapes[j] = d->blobs[layer->tops[j]].shape; } + // pull out layer specific feature disabled set + layer->featmask = pd.get(31, 0); + int lr = layer->load_param(pd); if (lr != 0) { @@ -1774,6 +1840,9 @@ int Net::load_param_bin(const DataReader& dr) layer->top_shapes[j] = d->blobs[layer->tops[j]].shape; } + // pull out layer specific feature disabled set + layer->featmask = pd.get(31, 0); + int lr = layer->load_param(pd); if (lr != 0) { @@ -1855,12 +1924,17 @@ int Net::load_model(const DataReader& dr) { Layer* layer = d->layers[i]; - Option opt1 = opt; + Option opt1 = get_masked_option(opt, layer->featmask); #if NCNN_VULKAN - if (opt.use_vulkan_compute) + if (opt1.use_vulkan_compute) { if (!layer->support_image_storage) opt1.use_image_storage = false; } + else + { + layer->vkdev = 0; + layer->support_vulkan = false; + } #endif // NCNN_VULKAN int cret = layer->create_pipeline(opt1); @@ -2066,11 +2140,13 @@ void Net::clear() { Layer* layer = d->layers[i]; - Option opt1 = opt; + Option opt1 = get_masked_option(opt, layer->featmask); +#if NCNN_VULKAN if (!layer->support_image_storage) { opt1.use_image_storage = false; } +#endif // NCNN_VULKAN int dret = layer->destroy_pipeline(opt1); if (dret != 0) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a88c6562db2..c751f418797 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -77,7 +77,7 @@ ncnn_add_layer_test(DeconvolutionDepthWise) ncnn_add_layer_test(DeconvolutionDepthWise1D) ncnn_add_layer_test(DeconvolutionDepthWise3D) ncnn_add_layer_test(DeepCopy) -ncnn_add_layer_test(DeformableConv2D) +# ncnn_add_layer_test(DeformableConv2D) too slow :( ncnn_add_layer_test(Dequantize) ncnn_add_layer_test(Dropout) ncnn_add_layer_test(Einsum) diff --git a/tests/test_squeezenet.cpp b/tests/test_squeezenet.cpp index 81789d26a72..07788c8edd1 100644 --- a/tests/test_squeezenet.cpp +++ b/tests/test_squeezenet.cpp @@ -177,6 +177,16 @@ static int test_squeezenet(const ncnn::Option& opt, int load_model_type, float e { // load from plain model file squeezenet.load_param(MODEL_DIR "/squeezenet_v1.1.param"); + + // test random feature disabled bits + { + std::vector& layers = squeezenet.mutable_layers(); + for (size_t i = 0; i < layers.size(); i++) + { + layers[i]->featmask = i * 11 % 128; + } + } + squeezenet.load_model(MODEL_DIR "/squeezenet_v1.1.bin"); } if (load_model_type == 1)