1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "nnrt_delegate_kernel.h"
17
18 #include <algorithm>
19 #include <cstdarg>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstring>
24 #include <functional>
25 #include <initializer_list>
26 #include <iostream>
27 #include <iterator>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35
36 #include "tensorflow/lite/context_util.h"
37 #include "neural_network_runtime.h"
38
39 namespace tflite {
40 namespace delegate {
41 namespace nnrt {
42 constexpr int32_t SCALAR_RANK = 1;
43
44 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(code, callDesc) \
45 do { \
46 if ( (code) != OH_NN_SUCCESS) { \
47 const auto errorDesc = NnrtErrorDescription((code)); \
48 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "NN API returned error %s at line %d while %s.\n", errorDesc.c_str(), \
49 __LINE__, (callDesc)); \
50 m_nnrt->OH_NNCompilation_Destroy(&m_pNnCompilation); \
51 return kTfLiteError; \
52 } \
53 } while (0)
54
Validate(const int32_t builtinCode)55 bool NnrtDelegateKernel::Validate(const int32_t builtinCode)
56 {
57 if (TFLITE_TYPE_TO_NNRT_TYPE.count(builtinCode) &&
58 TFLITE_TYPE_TO_NNRT_TYPE.at(builtinCode) != OH_NN_UNSUPPORT_OPS) {
59 return true;
60 }
61
62 return false;
63 }
64
Init(TfLiteContext * context,const TfLiteDelegateParams * params)65 TfLiteStatus NnrtDelegateKernel::Init(TfLiteContext* context, const TfLiteDelegateParams* params)
66 {
67 TF_LITE_ENSURE_EQ(context, params != nullptr, true);
68
69 if (m_initialised) {
70 TFLITE_LOG_PROD(TFLITE_LOG_INFO,
71 "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel has completed initialization, no need init again.");
72 return kTfLiteOk;
73 }
74
75 for (auto nodeIndex : TfLiteIntArrayView(params->nodes_to_replace)) {
76 m_delegateNodes.emplace_back(nodeIndex);
77 }
78
79 NnrtDelegate::Options delegateOptions;
80 TF_LITE_ENSURE_STATUS(NnrtDelegate::GetOptions(params->delegate, delegateOptions));
81 TF_LITE_ENSURE_STATUS(tflite::GetTargetDevice(context, params->delegate, m_nnrt, m_nnrtDevice));
82 if (m_nnModel == nullptr) {
83 m_nnModel = m_nnrt->OH_NNModel_Construct();
84 if (m_nnModel == nullptr) {
85 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create ONNRT model.");
86 return kTfLiteError;
87 }
88 TF_LITE_ENSURE_STATUS(BuildGraph(context, delegateOptions, params->input_tensors, params->output_tensors));
89 }
90
91 m_initialised = true;
92
93 return kTfLiteOk;
94 }
95
Prepare(TfLiteContext * context,TfLiteNode * node)96 TfLiteStatus NnrtDelegateKernel::Prepare(TfLiteContext* context, TfLiteNode* node)
97 {
98 TF_LITE_ENSURE_EQ(context, node != nullptr, true);
99
100 if (!m_initialised) {
101 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
102 "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel Prepare failed, not Init yet.");
103 return kTfLiteError;
104 }
105
106 if (m_compiled) {
107 return kTfLiteOk; // If model has completed compilation, no need compile again.
108 }
109
110 // Create OH_NNCompilation
111 m_pNnCompilation = m_nnrt->OH_NNCompilation_Construct(m_nnModel);
112 if (m_pNnCompilation == nullptr) {
113 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create OH_NNCompilation instance.");
114 return kTfLiteError;
115 }
116
117 NnrtDelegate::Options delegateOptions;
118 TF_LITE_ENSURE_STATUS(NnrtDelegate::GetOptions(node->delegate, delegateOptions));
119
120 TF_LITE_ENSURE_STATUS(SetNnOptions(context, delegateOptions));
121 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(m_nnrt->OH_NNCompilation_Build(m_pNnCompilation),
122 "completing NNRT compilation");
123
124 m_compiled = true;
125 return kTfLiteOk;
126 }
127
Invoke(TfLiteContext * context,TfLiteNode * node)128 TfLiteStatus NnrtDelegateKernel::Invoke(TfLiteContext* context, TfLiteNode* node)
129 {
130 if (!m_compiled) {
131 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
132 "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel Invoke failed, not compile yet.");
133 return kTfLiteError;
134 }
135
136 // Create OH_NNExecutor_Construct
137 OH_NNExecutor* pNnExecution {nullptr};
138 pNnExecution = m_nnrt->OH_NNExecutor_Construct(m_pNnCompilation);
139 if (pNnExecution == nullptr) {
140 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create OH_NNExecutor instance.");
141 return kTfLiteError;
142 }
143
144 // Set the input tensor buffers.
145 OH_NN_Tensor inputNnTensor;
146 TF_LITE_ENSURE_STATUS(SetInputTensors(context, node, pNnExecution, inputNnTensor));
147
148 // Get the output tensor buffers.
149 TF_LITE_ENSURE_STATUS(SetOutputTensors(context, node, pNnExecution));
150
151 // Invoke delegated subgraph.
152 RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNExecutor_Run(pNnExecution), "running computation");
153
154 m_nnrt->OH_NNExecutor_Destroy(&pNnExecution);
155 pNnExecution = nullptr;
156 return kTfLiteOk;
157 }
158
Map(const int32_t builtinCode,const NnrtOpMappingArgs & mappingArgs,int32_t & nnOpType) const159 TfLiteStatus NnrtDelegateKernel::Map(const int32_t builtinCode, const NnrtOpMappingArgs& mappingArgs,
160 int32_t& nnOpType) const
161 {
162 if (TFLITE_TYPE_TO_NNRT_TYPE.count(builtinCode) == 0) {
163 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
164 "[NNRT-DELEGATE_KERNEL] Not support current TF-Lite Operator, builtCode: %d.", builtinCode);
165 return kTfLiteError;
166 }
167
168 TfLiteStatus retValue = mappingArgs.builder->AddOpFuncParams(mappingArgs, builtinCode);
169 if (retValue != kTfLiteOk) {
170 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Failed to add params to these operations.");
171 return retValue;
172 }
173 nnOpType = TFLITE_TYPE_TO_NNRT_TYPE.at(builtinCode);
174
175 return kTfLiteOk;
176 }
177
BuildGraph(TfLiteContext * context,const NnrtDelegate::Options & delegateOptions,const TfLiteIntArray * inputTensors,const TfLiteIntArray * outputTensors)178 TfLiteStatus NnrtDelegateKernel::BuildGraph(TfLiteContext* context, const NnrtDelegate::Options& delegateOptions,
179 const TfLiteIntArray* inputTensors, const TfLiteIntArray* outputTensors)
180 {
181 if (context == nullptr) {
182 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] The context is nullptr when building the graph.");
183 return kTfLiteError;
184 }
185
186 TF_LITE_ENSURE_EQ(context, inputTensors != nullptr, true);
187 TF_LITE_ENSURE_EQ(context, outputTensors != nullptr, true);
188
189 // Build the ops and tensors.
190 TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context, inputTensors, delegateOptions));
191 // Map input and output tensor indices to NN
192 // Make the TensorFlow Lite inputs and outputs to nn_indices.
193 OH_NN_UInt32Array inputIndices;
194 OH_NN_UInt32Array outputIndices;
195 std::vector<uint32_t> inputsData;
196 for (auto i : TfLiteIntArrayView(inputTensors)) {
197 // Constant tensors are not NNRT inputs.
198 if ((i != kTfLiteOptionalTensor) && (context->tensors[i].allocation_type != kTfLiteMmapRo) &&
199 // The delegate might not have mapped this input (this can
200 // happen if one tensor is split in several ones)
201 (m_tensorMapping.LiteIndexToNn(i) != INVALID_INDEX)) {
202 const int32_t inputTensorNnIndex = m_tensorMapping.LiteIndexToNn(i);
203 inputsData.emplace_back(inputTensorNnIndex);
204 }
205 }
206
207 std::vector<uint32_t> outputsData;
208 for (auto i : TfLiteIntArrayView(outputTensors)) {
209 const int32_t outputTensorNnIndex = m_tensorMapping.LiteIndexToNn(i);
210 // Unmapped outputs are not added
211 if (outputTensorNnIndex != INVALID_INDEX) {
212 outputsData.emplace_back(outputTensorNnIndex);
213 }
214 }
215
216 inputIndices.data = inputsData.data();
217 outputIndices.data = outputsData.data();
218 inputIndices.size = inputsData.size();
219 outputIndices.size = outputsData.size();
220 // Tell NN to declare inputs/outputs
221 RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNModel_SpecifyInputsAndOutputs(m_nnModel, &inputIndices,
222 &outputIndices), "identifying model inputs and outputs");
223
224 RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNModel_Finish(m_nnModel), "finalizing the model");
225 return kTfLiteOk;
226 }
227
AddOpsAndTensors(TfLiteContext * context,const TfLiteIntArray * inputTensors,const NnrtDelegate::Options & delegateOptions)228 TfLiteStatus NnrtDelegateKernel::AddOpsAndTensors(TfLiteContext* context, const TfLiteIntArray* inputTensors,
229 const NnrtDelegate::Options& delegateOptions)
230 {
231 // The tensor builder allows creating a single op. It is created outside
232 // the for loop to avoid reallocating the vectors.
233 NnrtOpBuilderArgs opBuilderArgs = {
234 .context = context,
235 .nnModel = m_nnModel,
236 .inputTensors = const_cast<TfLiteIntArray*>(inputTensors),
237 .pTensorMapping = &m_tensorMapping,
238 .delegateOptions = delegateOptions
239 };
240 NnrtOpBuilder builder(m_nnrt, opBuilderArgs);
241
242 // Clear the input and output lists.
243 builder.ClearInputOuputLists();
244
245 // Add other tensors.
246 TfLiteNode* node = nullptr;
247 TfLiteRegistration* reg = nullptr;
248 for (int32_t nodeIndex : m_delegateNodes) {
249 node = nullptr;
250 reg = nullptr;
251 TF_LITE_ENSURE_STATUS(
252 context->GetNodeAndRegistration(context, nodeIndex, &node, ®)); // Obtain the op and registration.
253 if ((node == nullptr) || (reg == nullptr)) {
254 TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Get node and registration failed.");
255 return kTfLiteError;
256 }
257
258 const bool scalarAsTensor = IsScalarInputSupported(reg->builtin_code);
259 int32_t inputTensorFlags = 0;
260 if (scalarAsTensor) {
261 inputTensorFlags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
262 }
263
264 // Get op type and tensors, fails if the Validate function failed.
265 int32_t nnOpType;
266 NnrtOpMappingArgs opMappingArgs = { context, &builder, node, nodeIndex };
267 TF_LITE_ENSURE_STATUS(Map(reg->builtin_code, opMappingArgs, nnOpType));
268
269 for (int32_t inputPos = 0; inputPos < node->inputs->size; ++inputPos) {
270 if ((reg->builtin_code == kTfLiteBuiltinFullyConnected) &&
271 (node->inputs->data[inputPos] == kTfLiteOptionalTensor)) {
272 continue; // skip optional bias and handle it during mapping.
273 }
274 const auto inputIndex = node->inputs->data[inputPos];
275 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(inputIndex, reg->builtin_code, inputTensorFlags));
276 }
277 // Map outputs to NN API tensor indices.
278 int32_t outputTensorFlags = 0;
279 for (int32_t outputPos = 0; outputPos < node->outputs->size; ++outputPos) {
280 auto outputIndex = node->outputs->data[outputPos];
281 TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(outputIndex, reg->builtin_code, outputTensorFlags));
282 }
283 TF_LITE_ENSURE_STATUS(builder.FinalizeAddOperation(static_cast<OH_NN_OperationType>(nnOpType), nodeIndex));
284 }
285
286 return kTfLiteOk;
287 }
288
ConvertTensorTypeToNn(TfLiteContext * context,const std::pair<int32_t,int32_t> & indexPair,OH_NN_QuantParam * nnQuantParam,OH_NN_Tensor & nnTensor)289 TfLiteStatus NnrtDelegateKernel::ConvertTensorTypeToNn(TfLiteContext* context,
290 const std::pair<int32_t, int32_t>& indexPair, OH_NN_QuantParam* nnQuantParam, OH_NN_Tensor& nnTensor)
291 {
292 TF_LITE_ENSURE_EQ(context, context->tensors_size > indexPair.first, true);
293 TfLiteTensor* tensor = &(context->tensors[indexPair.first]);
294 TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
295
296 OH_NN_DataType nnType {OH_NN_UNKNOWN};
297 TF_LITE_ENSURE_STATUS(m_tensorMapping.ConvertType(context, indexPair.first, 0, nnType));
298
299 uint32_t tensorRank = static_cast<uint32_t>(tensor->dims->size);
300 int32_t* tensorDims = reinterpret_cast<int32_t*>(tensor->dims->data);
301 if (tensorDims == nullptr) {
302 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
303 "[NNRT-DELEGATE_KERNEL] The tensorDims is nullptr when converting the type of tensors to nnrt.");
304 return kTfLiteError;
305 }
306
307 // treat scalar input as single cell tensor in NNRT.
308 if (tensorRank == 0) {
309 tensorRank = SCALAR_RANK;
310 *tensorDims = SCALAR_RANK;
311 }
312
313 nnTensor.dataType = nnType;
314 nnTensor.dimensionCount = tensorRank;
315 nnTensor.dimensions = tensorDims;
316 nnTensor.quantParam = nnQuantParam;
317 nnTensor.type = OH_NN_TENSOR;
318
319 return kTfLiteOk;
320 }
321
SetInputTensors(TfLiteContext * context,TfLiteNode * node,OH_NNExecutor * pNnExecution,OH_NN_Tensor & nnTensor)322 TfLiteStatus NnrtDelegateKernel::SetInputTensors(TfLiteContext* context, TfLiteNode* node,
323 OH_NNExecutor* pNnExecution, OH_NN_Tensor& nnTensor)
324 {
325 TF_LITE_ENSURE_EQ(context, node != nullptr, true);
326 TF_LITE_ENSURE_EQ(context, pNnExecution != nullptr, true);
327
328 // Note: we access tflite tensors using
329 // absolute indices but NN api indices inputs by relative indices.
330 int32_t relativeIndex = 0;
331 OH_NN_QuantParam* nnQuantParam = nullptr;
332 TfLiteIntArray* tensors = node->inputs;
333 TF_LITE_ENSURE_EQ(context, tensors != nullptr, true);
334
335 for (auto absoluteIndex : TfLiteIntArrayView(tensors)) {
336 if (absoluteIndex == kTfLiteOptionalTensor) {
337 continue;
338 }
339
340 std::pair<int32_t, int32_t> indexPair = std::make_pair(absoluteIndex, relativeIndex);
341 ConvertTensorTypeToNn(context, indexPair, nnQuantParam, nnTensor);
342
343 TfLiteTensor* tensor = &context->tensors[absoluteIndex];
344 TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
345
346 if (tensor->allocation_type != kTfLiteMmapRo) {
347 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(m_nnrt->OH_NNExecutor_SetInput(pNnExecution, relativeIndex,
348 &nnTensor, tensor->data.raw, tensor->bytes),
349 "associating NNRT execution output with a memory object", tensor);
350 ++relativeIndex;
351 } else {
352 continue;
353 }
354 }
355
356 return kTfLiteOk;
357 }
358
SetOutputTensors(TfLiteContext * context,TfLiteNode * node,OH_NNExecutor * pNnExecution)359 TfLiteStatus NnrtDelegateKernel::SetOutputTensors(TfLiteContext* context, TfLiteNode* node,
360 OH_NNExecutor* pNnExecution)
361 {
362 TF_LITE_ENSURE_EQ(context, node != nullptr, true);
363 TF_LITE_ENSURE_EQ(context, pNnExecution != nullptr, true);
364
365 // Note: we access tflite tensors using
366 // absolute indices but NN api indices inputs by relative indices.
367 int32_t relativeIndex = 0;
368 TfLiteIntArray* tensors = node->outputs;
369 TF_LITE_ENSURE_EQ(context, tensors != nullptr, true);
370 for (auto absoluteIndex : TfLiteIntArrayView(tensors)) {
371 if (m_tensorMapping.LiteIndexToNn(absoluteIndex) == INVALID_INDEX) {
372 continue;
373 }
374
375 TfLiteTensor* tensor = &context->tensors[absoluteIndex];
376 TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
377 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
378 m_nnrt->OH_NNExecutor_SetOutput(pNnExecution, relativeIndex, tensor->data.raw, tensor->bytes),
379 "associating NNRT execution output to a memory object", tensor);
380 ++relativeIndex;
381 }
382
383 return kTfLiteOk;
384 }
385
SetNnOptions(TfLiteContext * context,const NnrtDelegate::Options & delegateOptions)386 TfLiteStatus NnrtDelegateKernel::SetNnOptions(TfLiteContext* context, const NnrtDelegate::Options& delegateOptions)
387 {
388 if (context == nullptr) {
389 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
390 "[NNRT-DELEGATE_KERNEL] The context is nullptr when setting nnrt options.");
391 return kTfLiteError;
392 }
393
394 RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNCompilation_SetDevice(m_pNnCompilation, m_nnrtDevice),
395 "creating NNRT compilation");
396
397 auto performance = delegateOptions.executionPerformance;
398 if (performance != OH_NN_PERFORMANCE_NONE) {
399 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(
400 m_nnrt->OH_NNCompilation_SetPerformanceMode(m_pNnCompilation, performance),
401 "setting compilation performance");
402 }
403
404 // Set cacahe, if cacheDir & modelToken & device is valid.
405 std::string cacheDir = delegateOptions.cacheDir;
406 std::string modelToken = delegateOptions.modelToken;
407 uint32_t version = delegateOptions.version;
408 if (!cacheDir.empty() && (!IsUseTargetDevice(delegateOptions) ||
409 (delegateOptions.acceleratorName == NNRT_REFERENCE_DEVICE))) {
410 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(
411 m_nnrt->OH_NNCompilation_SetCache(m_pNnCompilation, cacheDir.c_str(), version),
412 "setting compilation cache");
413 } else if (cacheDir.empty()) {
414 TFLITE_LOG_PROD(TFLITE_LOG_WARNING, "The cacheDir is empty, will not load or save cache.");
415 }
416 return kTfLiteOk;
417 }
418 } // namespace nnrt
419 } // namespace delegate
420 } // tflite
421