1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "nnrt_delegate_kernel.h"
17 
18 #include <algorithm>
19 #include <cstdarg>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstring>
24 #include <functional>
25 #include <initializer_list>
26 #include <iostream>
27 #include <iterator>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35 
36 #include "tensorflow/lite/context_util.h"
37 #include "neural_network_runtime.h"
38 
39 namespace tflite {
40 namespace delegate {
41 namespace nnrt {
42 constexpr int32_t SCALAR_RANK = 1;
43 
44 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(code, callDesc)                                                   \
45     do {                                                                                                              \
46         if ( (code) != OH_NN_SUCCESS) {                                                                               \
47             const auto errorDesc = NnrtErrorDescription((code));                                                      \
48             TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "NN API returned error %s at line %d while %s.\n", errorDesc.c_str(),   \
49                 __LINE__, (callDesc));                                                                                \
50             m_nnrt->OH_NNCompilation_Destroy(&m_pNnCompilation);                                                      \
51             return kTfLiteError;                                                                                      \
52         }                                                                                                             \
53     } while (0)
54 
Validate(const int32_t builtinCode)55 bool NnrtDelegateKernel::Validate(const int32_t builtinCode)
56 {
57     if (TFLITE_TYPE_TO_NNRT_TYPE.count(builtinCode) &&
58         TFLITE_TYPE_TO_NNRT_TYPE.at(builtinCode) != OH_NN_UNSUPPORT_OPS) {
59         return true;
60     }
61 
62     return false;
63 }
64 
Init(TfLiteContext * context,const TfLiteDelegateParams * params)65 TfLiteStatus NnrtDelegateKernel::Init(TfLiteContext* context, const TfLiteDelegateParams* params)
66 {
67     TF_LITE_ENSURE_EQ(context, params != nullptr, true);
68 
69     if (m_initialised) {
70         TFLITE_LOG_PROD(TFLITE_LOG_INFO,
71             "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel has completed initialization, no need init again.");
72         return kTfLiteOk;
73     }
74 
75     for (auto nodeIndex : TfLiteIntArrayView(params->nodes_to_replace)) {
76         m_delegateNodes.emplace_back(nodeIndex);
77     }
78 
79     NnrtDelegate::Options delegateOptions;
80     TF_LITE_ENSURE_STATUS(NnrtDelegate::GetOptions(params->delegate, delegateOptions));
81     TF_LITE_ENSURE_STATUS(tflite::GetTargetDevice(context, params->delegate, m_nnrt, m_nnrtDevice));
82     if (m_nnModel == nullptr) {
83         m_nnModel = m_nnrt->OH_NNModel_Construct();
84         if (m_nnModel == nullptr) {
85             TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create ONNRT model.");
86             return kTfLiteError;
87         }
88         TF_LITE_ENSURE_STATUS(BuildGraph(context, delegateOptions, params->input_tensors, params->output_tensors));
89     }
90 
91     m_initialised = true;
92 
93     return kTfLiteOk;
94 }
95 
Prepare(TfLiteContext * context,TfLiteNode * node)96 TfLiteStatus NnrtDelegateKernel::Prepare(TfLiteContext* context, TfLiteNode* node)
97 {
98     TF_LITE_ENSURE_EQ(context, node != nullptr, true);
99 
100     if (!m_initialised) {
101         TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
102             "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel Prepare failed, not Init yet.");
103         return kTfLiteError;
104     }
105 
106     if (m_compiled) {
107         return kTfLiteOk; // If model has completed compilation, no need compile again.
108     }
109 
110     // Create OH_NNCompilation
111     m_pNnCompilation = m_nnrt->OH_NNCompilation_Construct(m_nnModel);
112     if (m_pNnCompilation == nullptr) {
113         TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create OH_NNCompilation instance.");
114         return kTfLiteError;
115     }
116 
117     NnrtDelegate::Options delegateOptions;
118     TF_LITE_ENSURE_STATUS(NnrtDelegate::GetOptions(node->delegate, delegateOptions));
119 
120     TF_LITE_ENSURE_STATUS(SetNnOptions(context, delegateOptions));
121     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(m_nnrt->OH_NNCompilation_Build(m_pNnCompilation),
122         "completing NNRT compilation");
123 
124     m_compiled = true;
125     return kTfLiteOk;
126 }
127 
Invoke(TfLiteContext * context,TfLiteNode * node)128 TfLiteStatus NnrtDelegateKernel::Invoke(TfLiteContext* context, TfLiteNode* node)
129 {
130     if (!m_compiled) {
131         TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
132             "[NNRT-DELEGATE_KERNEL] NnrtDelegateKernel Invoke failed, not compile yet.");
133         return kTfLiteError;
134     }
135 
136     // Create OH_NNExecutor_Construct
137     OH_NNExecutor* pNnExecution {nullptr};
138     pNnExecution = m_nnrt->OH_NNExecutor_Construct(m_pNnCompilation);
139     if (pNnExecution == nullptr) {
140         TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Fail to create OH_NNExecutor instance.");
141         return kTfLiteError;
142     }
143 
144     // Set the input tensor buffers.
145     OH_NN_Tensor inputNnTensor;
146     TF_LITE_ENSURE_STATUS(SetInputTensors(context, node, pNnExecution, inputNnTensor));
147 
148     // Get the output tensor buffers.
149     TF_LITE_ENSURE_STATUS(SetOutputTensors(context, node, pNnExecution));
150 
151     // Invoke delegated subgraph.
152     RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNExecutor_Run(pNnExecution), "running computation");
153 
154     m_nnrt->OH_NNExecutor_Destroy(&pNnExecution);
155     pNnExecution = nullptr;
156     return kTfLiteOk;
157 }
158 
Map(const int32_t builtinCode,const NnrtOpMappingArgs & mappingArgs,int32_t & nnOpType) const159 TfLiteStatus NnrtDelegateKernel::Map(const int32_t builtinCode, const NnrtOpMappingArgs& mappingArgs,
160     int32_t& nnOpType) const
161 {
162     if (TFLITE_TYPE_TO_NNRT_TYPE.count(builtinCode) == 0) {
163         TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
164             "[NNRT-DELEGATE_KERNEL] Not support current TF-Lite Operator, builtCode: %d.", builtinCode);
165         return kTfLiteError;
166     }
167 
168     TfLiteStatus retValue = mappingArgs.builder->AddOpFuncParams(mappingArgs, builtinCode);
169     if (retValue != kTfLiteOk) {
170         TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Failed to add params to these operations.");
171         return retValue;
172     }
173     nnOpType = TFLITE_TYPE_TO_NNRT_TYPE.at(builtinCode);
174 
175     return kTfLiteOk;
176 }
177 
BuildGraph(TfLiteContext * context,const NnrtDelegate::Options & delegateOptions,const TfLiteIntArray * inputTensors,const TfLiteIntArray * outputTensors)178 TfLiteStatus NnrtDelegateKernel::BuildGraph(TfLiteContext* context, const NnrtDelegate::Options& delegateOptions,
179     const TfLiteIntArray* inputTensors, const TfLiteIntArray* outputTensors)
180 {
181     if (context == nullptr) {
182         TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] The context is nullptr when building the graph.");
183         return kTfLiteError;
184     }
185 
186     TF_LITE_ENSURE_EQ(context, inputTensors != nullptr, true);
187     TF_LITE_ENSURE_EQ(context, outputTensors != nullptr, true);
188 
189     // Build the ops and tensors.
190     TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context, inputTensors, delegateOptions));
191     // Map input and output tensor indices to NN
192     // Make the TensorFlow Lite inputs and outputs to nn_indices.
193     OH_NN_UInt32Array inputIndices;
194     OH_NN_UInt32Array outputIndices;
195     std::vector<uint32_t> inputsData;
196     for (auto i : TfLiteIntArrayView(inputTensors)) {
197         // Constant tensors are not NNRT inputs.
198         if ((i != kTfLiteOptionalTensor) && (context->tensors[i].allocation_type != kTfLiteMmapRo) &&
199             // The delegate might not have mapped this input (this can
200             // happen if one tensor is split in several ones)
201             (m_tensorMapping.LiteIndexToNn(i) != INVALID_INDEX)) {
202             const int32_t inputTensorNnIndex = m_tensorMapping.LiteIndexToNn(i);
203             inputsData.emplace_back(inputTensorNnIndex);
204         }
205     }
206 
207     std::vector<uint32_t> outputsData;
208     for (auto i : TfLiteIntArrayView(outputTensors)) {
209         const int32_t outputTensorNnIndex = m_tensorMapping.LiteIndexToNn(i);
210         // Unmapped outputs are not added
211         if (outputTensorNnIndex != INVALID_INDEX) {
212             outputsData.emplace_back(outputTensorNnIndex);
213         }
214     }
215 
216     inputIndices.data = inputsData.data();
217     outputIndices.data = outputsData.data();
218     inputIndices.size = inputsData.size();
219     outputIndices.size = outputsData.size();
220     // Tell NN to declare inputs/outputs
221     RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNModel_SpecifyInputsAndOutputs(m_nnModel, &inputIndices,
222         &outputIndices), "identifying model inputs and outputs");
223 
224     RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNModel_Finish(m_nnModel), "finalizing the model");
225     return kTfLiteOk;
226 }
227 
AddOpsAndTensors(TfLiteContext * context,const TfLiteIntArray * inputTensors,const NnrtDelegate::Options & delegateOptions)228 TfLiteStatus NnrtDelegateKernel::AddOpsAndTensors(TfLiteContext* context, const TfLiteIntArray* inputTensors,
229     const NnrtDelegate::Options& delegateOptions)
230 {
231     // The tensor builder allows creating a single op. It is created outside
232     // the for loop to avoid reallocating the vectors.
233     NnrtOpBuilderArgs opBuilderArgs = {
234         .context = context,
235         .nnModel = m_nnModel,
236         .inputTensors = const_cast<TfLiteIntArray*>(inputTensors),
237         .pTensorMapping = &m_tensorMapping,
238         .delegateOptions = delegateOptions
239     };
240     NnrtOpBuilder builder(m_nnrt, opBuilderArgs);
241 
242     // Clear the input and output lists.
243     builder.ClearInputOuputLists();
244 
245     // Add other tensors.
246     TfLiteNode* node = nullptr;
247     TfLiteRegistration* reg = nullptr;
248     for (int32_t nodeIndex : m_delegateNodes) {
249         node = nullptr;
250         reg = nullptr;
251         TF_LITE_ENSURE_STATUS(
252             context->GetNodeAndRegistration(context, nodeIndex, &node, &reg)); // Obtain the op and registration.
253         if ((node == nullptr) || (reg == nullptr)) {
254             TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "[NNRT-DELEGATE_KERNEL] Get node and registration failed.");
255             return kTfLiteError;
256         }
257 
258         const bool scalarAsTensor = IsScalarInputSupported(reg->builtin_code);
259         int32_t inputTensorFlags = 0;
260         if (scalarAsTensor) {
261             inputTensorFlags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
262         }
263 
264         // Get op type and tensors, fails if the Validate function failed.
265         int32_t nnOpType;
266         NnrtOpMappingArgs opMappingArgs = { context, &builder, node, nodeIndex };
267         TF_LITE_ENSURE_STATUS(Map(reg->builtin_code, opMappingArgs, nnOpType));
268 
269         for (int32_t inputPos = 0; inputPos < node->inputs->size; ++inputPos) {
270             if ((reg->builtin_code == kTfLiteBuiltinFullyConnected) &&
271                 (node->inputs->data[inputPos] == kTfLiteOptionalTensor)) {
272                 continue; // skip optional bias and handle it during mapping.
273             }
274             const auto inputIndex = node->inputs->data[inputPos];
275             TF_LITE_ENSURE_STATUS(builder.AddTensorInput(inputIndex, reg->builtin_code, inputTensorFlags));
276         }
277         // Map outputs to NN API tensor indices.
278         int32_t outputTensorFlags = 0;
279         for (int32_t outputPos = 0; outputPos < node->outputs->size; ++outputPos) {
280             auto outputIndex = node->outputs->data[outputPos];
281             TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(outputIndex, reg->builtin_code, outputTensorFlags));
282         }
283         TF_LITE_ENSURE_STATUS(builder.FinalizeAddOperation(static_cast<OH_NN_OperationType>(nnOpType), nodeIndex));
284     }
285 
286     return kTfLiteOk;
287 }
288 
ConvertTensorTypeToNn(TfLiteContext * context,const std::pair<int32_t,int32_t> & indexPair,OH_NN_QuantParam * nnQuantParam,OH_NN_Tensor & nnTensor)289 TfLiteStatus NnrtDelegateKernel::ConvertTensorTypeToNn(TfLiteContext* context,
290     const std::pair<int32_t, int32_t>& indexPair, OH_NN_QuantParam* nnQuantParam, OH_NN_Tensor& nnTensor)
291 {
292     TF_LITE_ENSURE_EQ(context, context->tensors_size > indexPair.first, true);
293     TfLiteTensor* tensor = &(context->tensors[indexPair.first]);
294     TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
295 
296     OH_NN_DataType nnType {OH_NN_UNKNOWN};
297     TF_LITE_ENSURE_STATUS(m_tensorMapping.ConvertType(context, indexPair.first, 0, nnType));
298 
299     uint32_t tensorRank = static_cast<uint32_t>(tensor->dims->size);
300     int32_t* tensorDims = reinterpret_cast<int32_t*>(tensor->dims->data);
301     if (tensorDims == nullptr) {
302         TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
303             "[NNRT-DELEGATE_KERNEL] The tensorDims is nullptr when converting the type of tensors to nnrt.");
304         return kTfLiteError;
305     }
306 
307     // treat scalar input as single cell tensor in NNRT.
308     if (tensorRank == 0) {
309         tensorRank = SCALAR_RANK;
310         *tensorDims = SCALAR_RANK;
311     }
312 
313     nnTensor.dataType = nnType;
314     nnTensor.dimensionCount = tensorRank;
315     nnTensor.dimensions = tensorDims;
316     nnTensor.quantParam = nnQuantParam;
317     nnTensor.type = OH_NN_TENSOR;
318 
319     return kTfLiteOk;
320 }
321 
SetInputTensors(TfLiteContext * context,TfLiteNode * node,OH_NNExecutor * pNnExecution,OH_NN_Tensor & nnTensor)322 TfLiteStatus NnrtDelegateKernel::SetInputTensors(TfLiteContext* context, TfLiteNode* node,
323     OH_NNExecutor* pNnExecution, OH_NN_Tensor& nnTensor)
324 {
325     TF_LITE_ENSURE_EQ(context, node != nullptr, true);
326     TF_LITE_ENSURE_EQ(context, pNnExecution != nullptr, true);
327 
328     // Note: we access tflite tensors using
329     // absolute indices but NN api indices inputs by relative indices.
330     int32_t relativeIndex = 0;
331     OH_NN_QuantParam* nnQuantParam = nullptr;
332     TfLiteIntArray* tensors = node->inputs;
333     TF_LITE_ENSURE_EQ(context, tensors != nullptr, true);
334 
335     for (auto absoluteIndex : TfLiteIntArrayView(tensors)) {
336         if (absoluteIndex == kTfLiteOptionalTensor) {
337             continue;
338         }
339 
340         std::pair<int32_t, int32_t> indexPair = std::make_pair(absoluteIndex, relativeIndex);
341         ConvertTensorTypeToNn(context, indexPair, nnQuantParam, nnTensor);
342 
343         TfLiteTensor* tensor = &context->tensors[absoluteIndex];
344         TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
345 
346         if (tensor->allocation_type != kTfLiteMmapRo) {
347             RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(m_nnrt->OH_NNExecutor_SetInput(pNnExecution, relativeIndex,
348                 &nnTensor, tensor->data.raw, tensor->bytes),
349                 "associating NNRT execution output with a memory object", tensor);
350             ++relativeIndex;
351         } else {
352             continue;
353         }
354     }
355 
356     return kTfLiteOk;
357 }
358 
SetOutputTensors(TfLiteContext * context,TfLiteNode * node,OH_NNExecutor * pNnExecution)359 TfLiteStatus NnrtDelegateKernel::SetOutputTensors(TfLiteContext* context, TfLiteNode* node,
360     OH_NNExecutor* pNnExecution)
361 {
362     TF_LITE_ENSURE_EQ(context, node != nullptr, true);
363     TF_LITE_ENSURE_EQ(context, pNnExecution != nullptr, true);
364 
365     // Note: we access tflite tensors using
366     // absolute indices but NN api indices inputs by relative indices.
367     int32_t relativeIndex = 0;
368     TfLiteIntArray* tensors = node->outputs;
369     TF_LITE_ENSURE_EQ(context, tensors != nullptr, true);
370     for (auto absoluteIndex : TfLiteIntArrayView(tensors)) {
371         if (m_tensorMapping.LiteIndexToNn(absoluteIndex) == INVALID_INDEX) {
372             continue;
373         }
374 
375         TfLiteTensor* tensor = &context->tensors[absoluteIndex];
376         TF_LITE_ENSURE_EQ(context, tensor != nullptr, true);
377         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
378             m_nnrt->OH_NNExecutor_SetOutput(pNnExecution, relativeIndex, tensor->data.raw, tensor->bytes),
379             "associating NNRT execution output to a memory object", tensor);
380         ++relativeIndex;
381     }
382 
383     return kTfLiteOk;
384 }
385 
SetNnOptions(TfLiteContext * context,const NnrtDelegate::Options & delegateOptions)386 TfLiteStatus NnrtDelegateKernel::SetNnOptions(TfLiteContext* context, const NnrtDelegate::Options& delegateOptions)
387 {
388     if (context == nullptr) {
389         TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
390             "[NNRT-DELEGATE_KERNEL] The context is nullptr when setting nnrt options.");
391         return kTfLiteError;
392     }
393 
394     RETURN_TFLITE_ERROR_IF_NN_ERROR(m_nnrt->OH_NNCompilation_SetDevice(m_pNnCompilation, m_nnrtDevice),
395         "creating NNRT compilation");
396 
397     auto performance = delegateOptions.executionPerformance;
398     if (performance != OH_NN_PERFORMANCE_NONE) {
399         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(
400             m_nnrt->OH_NNCompilation_SetPerformanceMode(m_pNnCompilation, performance),
401                 "setting compilation performance");
402     }
403 
404     // Set cacahe, if cacheDir & modelToken & device is valid.
405     std::string cacheDir = delegateOptions.cacheDir;
406     std::string modelToken = delegateOptions.modelToken;
407     uint32_t version = delegateOptions.version;
408     if (!cacheDir.empty() && (!IsUseTargetDevice(delegateOptions) ||
409         (delegateOptions.acceleratorName == NNRT_REFERENCE_DEVICE))) {
410         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_COMPILE(
411             m_nnrt->OH_NNCompilation_SetCache(m_pNnCompilation, cacheDir.c_str(), version),
412             "setting compilation cache");
413     } else if (cacheDir.empty()) {
414         TFLITE_LOG_PROD(TFLITE_LOG_WARNING, "The cacheDir is empty, will not load or save cache.");
415     }
416     return kTfLiteOk;
417 }
418 } // namespace nnrt
419 } // namespace delegate
420 } // tflite
421