Skip to content

Commit c01cd93

Browse files
[CPU] Convolution fixes combined (#30890)
Includes: - #30870 - #30875 - #30872 - #30871 --------- Co-authored-by: Maksim Kutakov <maksim.kutakov@intel.com>
1 parent cb34723 commit c01cd93

File tree

6 files changed

+166
-33
lines changed

6 files changed

+166
-33
lines changed

src/plugins/intel_cpu/src/nodes/executors/convolution_config.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ struct ConvAttrs {
2020
std::vector<size_t> dilation;
2121
std::vector<ptrdiff_t> paddingL;
2222
std::vector<ptrdiff_t> paddingR;
23-
AutoPaddingType autoPadding;
23+
AutoPaddingType autoPadding = AutoPaddingType::None;
2424

25-
bool withBias;
26-
bool weightsNonTransposed;
27-
bool isGrouped;
25+
bool withBias = false;
26+
bool weightsNonTransposed = false;
27+
bool isGrouped = false;
2828
// @todo can we just check for port precisions instead?
29-
bool isGraphQuantized;
30-
bool fcSemantic;
31-
bool nonConstantWeights;
32-
ZeroPointsType inputZeroPointsType;
29+
bool isGraphQuantized = false;
30+
bool fcSemantic = false;
31+
bool nonConstantWeights = false;
32+
ZeroPointsType inputZeroPointsType = ZeroPointsType::None;
3333
std::vector<float> dqScales;
3434

3535
PostOps postOps;

src/plugins/intel_cpu/src/nodes/executors/convolution_implementations.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
#include <vector>
66

7+
#include "cpu/x64/cpu_isa_traits.hpp"
8+
#include "memory_desc/cpu_memory_desc.h"
79
#include "memory_desc/dnnl_blocked_memory_desc.h"
810
#include "memory_format_filter.hpp"
911
#include "nodes/executors/convolution_config.hpp"
@@ -63,6 +65,11 @@ template <typename PostOpType>
6365
});
6466
}
6567

68+
[[maybe_unused]] static inline bool isQuantized(const ConvConfig& config) {
69+
return one_of(config.descs.at(ARG_SRC)->getPrecision(), ov::element::u8, ov::element::i8) &&
70+
config.descs.at(ARG_WEI)->getPrecision() == ov::element::i8;
71+
};
72+
6673
template <typename Attrs>
6774
bool MatchesMemoryFormatFilter(const executor::Config<Attrs>& config,
6875
const LayoutConfig& layoutConfig,
@@ -113,7 +120,7 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
113120
}
114121

115122
VERIFY(!hasPostOp<DepthwiseConvolutionPostOp>(config), UNSUPPORTED_POST_OPS);
116-
VERIFY(DnnlConvolutionPrimitive::isBrgConvAvailable(config), "brgemm convolution is not available");
123+
VERIFY(isQuantized(config) || DnnlConvolutionPrimitive::isBrgConvAvailable(config), "is not quantized or brgemm convolution is not available");
117124

118125
return true;
119126
},
@@ -131,6 +138,7 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
131138
}
132139

133140
// fork kernel with dw conv post ops supports only src: (ncsp | nCsp8c), dst: nCsp8c
141+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
134142
VERIFY(!hasPostOp<DepthwiseConvolutionPostOp>(config), UNSUPPORTED_POST_OPS);
135143
const auto [groupNum, groupIC, IC, groupOC] = DnnlConvolutionPrimitive::getChannelParams(config);
136144

@@ -144,12 +152,15 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
144152
"convolution_dnnl_ncsp_nCsp16c", ExecutorType::Dnnl, OperationType::Convolution, ShapeTolerance::Agnostic,
145153
// supports
146154
[](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool {
155+
VERIFY(dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core), UNSUPPORTED_ISA);
156+
147157
if (!MatchesMemoryFormatFilter(config, LayoutConfig{LayoutType::ncsp, LayoutType::ncsp, LayoutType::nCsp16c, LayoutType::nCsp16c},
148158
memoryFormatFilter)) {
149159
return false;
150160
}
151161

152162
// fork kernel with dw conv post ops supports only src: (ncsp | nCsp8c), dst: nCsp8c
163+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
153164
VERIFY(!hasPostOp<DepthwiseConvolutionPostOp>(config), UNSUPPORTED_POST_OPS);
154165

155166
const auto [groupNum, groupIC, IC, groupOC] = DnnlConvolutionPrimitive::getChannelParams(config);
@@ -169,6 +180,7 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
169180
return false;
170181
}
171182

183+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
172184
const auto [groupNum, groupIC, IC, groupOC] = DnnlConvolutionPrimitive::getChannelParams(config);
173185

174186
return IC < 4 && groupOC != 1;
@@ -181,12 +193,15 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
181193
"convolution_dnnl_nCsp16c_nCsp16c", ExecutorType::Dnnl, OperationType::Convolution, ShapeTolerance::Agnostic,
182194
// supports
183195
[](const ConvConfig& config, const MemoryFormatFilter& memoryFormatFilter) -> bool {
196+
VERIFY(dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core), UNSUPPORTED_ISA);
197+
184198
if (!MatchesMemoryFormatFilter(config, LayoutConfig{LayoutType::nCsp16c, LayoutType::ncsp, LayoutType::nCsp16c, LayoutType::nCsp16c},
185199
memoryFormatFilter)) {
186200
return false;
187201
}
188202

189203
// fork kernel with dw conv post ops supports only src: (ncsp | nCsp8c), dst: nCsp8c
204+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
190205
VERIFY(!hasPostOp<DepthwiseConvolutionPostOp>(config), UNSUPPORTED_POST_OPS);
191206

192207
const auto [groupNum, groupIC, IC, groupOC] = DnnlConvolutionPrimitive::getChannelParams(config);
@@ -206,6 +221,7 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
206221
return false;
207222
}
208223

224+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
209225
const auto [groupNum, groupIC, IC, groupOC] = DnnlConvolutionPrimitive::getChannelParams(config);
210226

211227
return IC > 4;
@@ -223,6 +239,7 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
223239
return false;
224240
}
225241

242+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
226243
// fork kernel with dw conv post ops supports only src: (ncsp | nCsp8c), dst: nCsp8c
227244
VERIFY(!hasPostOp<DepthwiseConvolutionPostOp>(config), UNSUPPORTED_POST_OPS);
228245

@@ -241,6 +258,8 @@ const std::vector<ExecutorImplementation<ConvAttrs>>& getImplementations() {
241258
return false;
242259
}
243260

261+
VERIFY(!isQuantized(config), UNSUPPORTED_SRC_PRECISIONS);
262+
244263
return !one_of(srcType(config), ov::element::bf16, ov::element::f16) && DnnlConvolutionPrimitive::isNspcAvailable(config);
245264
},
246265
RequiresFallbackDefault{{LayoutType::nspc, LayoutType::ncsp, LayoutType::nspc, LayoutType::nspc}},

src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define HEURISTICS_MISMATCH " heuristics mismatch"
2121
#define UNSUPPORTED_PER_CHANNEL_QUANTIZATION " unsupported per-channel quantization"
2222

23+
// @todo implement VERIFY_OR version to support multiple conditions and error messages
2324
#define VERIFY(condition, ...) \
2425
do { \
2526
if (!(condition)) { \

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -282,27 +282,33 @@ static std::tuple<primitive_desc, size_t> selectPrimitiveDescWithMultipleAttribu
282282
};
283283

284284
PrimitiveDescWithPriority prim_desc_w_priority{dnnl::primitive_desc(), 0, implPriorities.size()};
285+
const bool first_match = implPriorities.front() == impl_desc_type::unknown;
285286

286287
// try all the provided attributes and select the one which results in a primitive desc with the highest priority
287288
for (size_t attrId = 0; attrId < attrs.size(); attrId++) {
288289
const auto& attr = attrs[attrId];
289290

290-
for (size_t priorityId = 0; priorityId < implPriorities.size(); priorityId++) {
291-
const auto preferredImplType = implPriorities[priorityId];
292-
// the only way to fully reset primitive_desc after iterating over the implementations is to re-create it
293-
auto cur_desc = createPrimitiveDescriptor(attr);
294-
const bool found = DnnlExtensionUtils::find_implementation(cur_desc, preferredImplType);
295-
296-
const size_t highestPriority = prim_desc_w_priority.priority;
297-
if (found && priorityId < highestPriority) {
298-
prim_desc_w_priority = {cur_desc, attrId, priorityId};
299-
}
300-
}
301-
}
302-
303-
auto prim_desc = prim_desc_w_priority.prim_desc;
304-
305-
return {prim_desc, prim_desc_w_priority.attrId};
291+
auto cur_desc = createPrimitiveDescriptor(attr);
292+
293+
DnnlExtensionUtils::for_each_implementation(
294+
cur_desc,
295+
first_match,
296+
[&](impl_desc_type implType) { // is acceptable implementation
297+
return contains(implPriorities, implType);
298+
},
299+
[&](dnnl::primitive_desc& desc) { // is implementation with highest priority
300+
const impl_desc_type descImplType = parse_impl_name(desc.impl_info_str());
301+
const auto it = std::find(implPriorities.begin(), implPriorities.end(), descImplType);
302+
const size_t priorityId = std::distance(implPriorities.begin(), it);
303+
const size_t highestPriority = prim_desc_w_priority.priority;
304+
if (priorityId < highestPriority) {
305+
auto desc_copy = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get(true)));
306+
prim_desc_w_priority = {desc_copy, attrId, priorityId};
307+
}
308+
});
309+
}
310+
311+
return {prim_desc_w_priority.prim_desc, prim_desc_w_priority.attrId};
306312
}
307313

308314
static primitive_desc createPrimitiveDesc(const dnnl::memory::desc& inputDesc,
@@ -881,7 +887,7 @@ bool DnnlConvolutionPrimitive::isJitPlanarAvailable(const ConvConfig& config) {
881887
const bool isAvx2FP32 = !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) &&
882888
dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2) && !config.attrs.isGraphQuantized;
883889

884-
const auto [groupNum, groupIC, groupOC, IC] = getChannelParams(config);
890+
const auto [groupNum, groupIC, IC, groupOC] = getChannelParams(config);
885891

886892
return (IC == 1 && groupOC * groupNum == 1) && isAvx2FP32;
887893
}
@@ -919,12 +925,7 @@ bool DnnlConvolutionPrimitive::isNspcAvailable(const ConvConfig& config) {
919925
auto outDims = config.descs.at(ARG_DST)->getShape().getDims();
920926
auto ndims = inpDims.size();
921927

922-
size_t groupNum;
923-
size_t groupIC;
924-
size_t groupOC;
925-
size_t IC;
926-
927-
std::tie(groupNum, groupIC, groupOC, IC) = getChannelParams(config);
928+
const auto [groupNum, groupIC, IC, groupOC] = getChannelParams(config);
928929

929930
bool isDepthWise = config.attrs.isGrouped && 1 == groupOC && 1 == groupIC;
930931

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct FCAttrs {
1616
bool withBias = false;
1717
bool weightsNonTransposed = false;
1818
bool sparseWeights = false;
19-
uint64_t dynamicQuantizationGroupSize;
19+
uint64_t dynamicQuantizationGroupSize = 0;
2020
bool nonConstantWeights = false;
2121

2222
ov::intel_cpu::Config::ModelType modelType = ov::intel_cpu::Config::ModelType::Unknown;
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include <memory>
6+
7+
#include "common_test_utils/node_builders/fake_quantize.hpp"
8+
#include "openvino/core/type/element_type.hpp"
9+
#include "openvino/op/constant.hpp"
10+
#include "openvino/op/convolution.hpp"
11+
#include "shared_test_classes/base/ov_subgraph.hpp"
12+
#include "utils/cpu_test_utils.hpp"
13+
#include "utils/filter_cpu_info.hpp"
14+
#include "utils/fusing_test_utils.hpp"
15+
16+
using namespace CPUTestUtils;
17+
18+
namespace ov {
19+
namespace test {
20+
21+
/* Verify simple quantized convolution subgraph.
22+
No reference implementations are expected to be used.
23+
24+
Param1
25+
|
26+
FQ_U8
27+
|
28+
Conv1
29+
|
30+
PreLU
31+
|
32+
Result
33+
34+
*/
35+
36+
typedef std::tuple<CPUSpecificParams, fusingSpecificParams> ConvU8I8FP32Params;
37+
38+
class ConvU8I8FP32 : public testing::WithParamInterface<ConvU8I8FP32Params>,
39+
virtual public SubgraphBaseStaticTest,
40+
public CpuTestWithFusing {
41+
public:
42+
static std::string getTestCaseName(const testing::TestParamInfo<ConvU8I8FP32Params>& obj) {
43+
CPUSpecificParams cpuParams;
44+
fusingSpecificParams fusingParams;
45+
std::tie(cpuParams, fusingParams) = obj.param;
46+
47+
std::ostringstream result;
48+
result << "CPU_";
49+
result << CPUTestsBase::getTestCaseName(cpuParams);
50+
result << CpuTestWithFusing::getTestCaseName(fusingParams);
51+
return result.str();
52+
}
53+
54+
void SetUp() override {
55+
const auto& [cpuParams, fusingParams] = this->GetParam();
56+
57+
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
58+
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
59+
60+
ov::element::Type netPrecision = ov::element::f32;
61+
62+
targetDevice = ov::test::utils::DEVICE_CPU;
63+
64+
auto make_i8_fake_quantize = [&](std::shared_ptr<ov::Node> input, ov::element::Type dataType) {
65+
return ov::test::utils::make_fake_quantize(input, dataType, 256, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f});
66+
};
67+
68+
auto make_u8_fake_quantize = [&](std::shared_ptr<ov::Node> input, ov::element::Type dataType) {
69+
return ov::test::utils::make_fake_quantize(input, dataType, 256, {}, {0.0f}, {2.55f}, {0.0f}, {2.55f});
70+
};
71+
72+
auto make_quantized_weights = [&make_i8_fake_quantize](const Shape& shape, ov::element::Type dataType) {
73+
auto weights = ov::op::v0::Constant::create(dataType, shape, std::vector<float>{-0.0512377955019474});
74+
return make_i8_fake_quantize(weights, dataType);
75+
};
76+
77+
ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(netPrecision, ov::Shape{1, 3, 8, 8})};
78+
79+
auto fq_input = make_u8_fake_quantize(params[0], netPrecision);
80+
auto fq_weights = make_quantized_weights({3, 3, 4, 4}, netPrecision);
81+
82+
auto conv = std::make_shared<ov::op::v1::Convolution>(fq_input,
83+
fq_weights,
84+
Strides{1, 1},
85+
CoordinateDiff{0, 0},
86+
CoordinateDiff{0, 0},
87+
Strides{1, 1},
88+
ov::op::PadType::SAME_UPPER);
89+
90+
auto result = std::make_shared<ov::op::v0::Result>(conv);
91+
92+
function = makeNgraphFunction(netPrecision, params, conv, "Convolution");
93+
}
94+
};
95+
96+
TEST_P(ConvU8I8FP32, smoke_CompareWithRefs) {
97+
run();
98+
CheckPluginRelatedResults(compiledModel, "Convolution");
99+
}
100+
101+
INSTANTIATE_TEST_SUITE_P(
102+
smoke_Conv,
103+
ConvU8I8FP32,
104+
::testing::Combine(::testing::ValuesIn(filterCPUInfo(
105+
{CPUSpecificParams{{}, {}, {"jit_sse42"}, {"jit_sse42_I8"}}, // verify i8 SSE42 just in case
106+
CPUSpecificParams{{}, {}, {"jit_avx2"}, {"jit_avx2_I8"}},
107+
CPUSpecificParams{{}, {}, {"brgconv_avx512"}, {"brgconv_avx512_I8"}}})),
108+
::testing::Values(fusingPReluPerTensor)),
109+
ConvU8I8FP32::getTestCaseName);
110+
111+
} // namespace test
112+
} // namespace ov

0 commit comments

Comments
 (0)