1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 /** 17 * @addtogroup feature_processor 18 * @{ 19 * 20 * @brief Defines the basic functions for FeatureProcessor, including the supported data types 21 * and other related configuration parameters. 22 * 23 * @since 2.2 24 * @version 1.0 25 */ 26 27 /** 28 * @file mfcc_processor.h 29 * 30 * @brief Defines MFCCProcessor that calculates the Mel-frequency cepstral coefficient (MFCC) based 31 * on the 16-bit PCM audio data. 32 * 33 * @since 2.2 34 * @version 1.0 35 */ 36 37 #ifndef AUDIO_PREPROCESS_MFCC_PROCESSOR_H 38 #define AUDIO_PREPROCESS_MFCC_PROCESSOR_H 39 40 #include <cstdint> 41 #include <memory> 42 43 #include "feature_processor.h" 44 45 namespace OHOS { 46 namespace AI { 47 namespace Feature { 48 /** 49 * @brief Specifies the structure for the MFCCProcessor configuration. 50 * 51 * @since 2.2 52 * @version 1.0 53 */ 54 struct MFCCConfig : FeatureProcessorConfig { 55 /** Indicates whether to execute the energy gain */ 56 bool enablePcanGain; 57 /** Indicates whether to execute the weighted logarithm */ 58 bool enableLogScale; 59 /** Number of smooth bits for noise reduction. 60 * The value is multiplied by 2 to the power of <b>noiseSmoothingBits</b> for signal smoothing. */ 61 int16_t noiseSmoothingBits; 62 /** Number of gain bits. 63 * The value is multiplied by 2 to the power of <b>pcanGainBits</b> to achieve the gain effect. */ 64 int16_t pcanGainBits; 65 /** Logarithmic weight. The output logarithm is multiplied by 2 to the power of <b>logScaleShift</b>. */ 66 int16_t logScaleShift; 67 /** Size of the audio sampling window. The maximum value is <b>16000</b>. */ 68 uint32_t windowSize; 69 /** Interval between audio sliding windows. The maximum value is <b>16000</b>. */ 70 uint32_t slideSize; 71 /** Sampling rate of audio data. */ 72 uint32_t sampleRate; 73 /** Number of MFCC features. The maximum value is {@link MAX_SAMPLE_SIZE}. */ 74 uint32_t featureSize; 75 /** Number of frequency-domain channels. The maximum value is {@link MAX_NUM_CHANNELS}. */ 76 uint32_t numChannels; 77 /** Maximum frequency threshold of the filter. The value must be greater than <b>0</b> and the actual value depends 78 * on the specific frequency requirements. */ 79 float filterbankLowerBandLimit; 80 /** Minimum frequency threshold of the filter. 81 * The value must be greater than <b>0</b> and less than <b>filterbankLowerBandLimit</b>. */ 82 float filterbankUpperBandLimit; 83 /** Smoothing coefficient for even channels of noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>. */ 84 float noiseEvenSmoothing; 85 /** Smoothing coefficient for odd channels of noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>. */ 86 float noiseOddSmoothing; 87 /** Signal reservation ratio for noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>. 88 * Value <b>1.0</b> indicates the signals are not filtered, and value <b>0.0</b> indicates signals are 89 * all filtered. */ 90 float noiseMinSignalRemaining; 91 /** Gain normalization index. The value ranges from <b>0.0</b> to <b>1.0</b>. 92 * Value <b>0.0</b> indicates no gain, and value <b>1.0</b> indicates full gain. */ 93 float pcanGainStrength; 94 /** Denominator offset for gain normalization to prevent the base number from being <b>0</b>. 95 * The value must be greater than <b>0</b>. */ 96 float pcanGainOffset; 97 }; 98 99 /** 100 * @brief Defines the sliding window configuration for MFCCProcessor. 101 * 102 * @since 2.2 103 * @version 1.0 104 */ 105 struct MFCCLocalConfig { 106 /** Number of samples for each sliding. The value is equal to that defined by {@link MFCCConfig.slideSize}. */ 107 uint32_t slideSampleNum; 108 /** Number of samples for each sampling. The value is equal to that defined by {@link MFCCConfig.windowSize}. */ 109 uint32_t windowSampleNum; 110 /** Number of frequency-domain channels. The value is equal to that defined by {@link MFCCConfig.numChannels}. */ 111 uint32_t numChannels; 112 /** Number of MFCC features corresponding to the samples. 113 * The value is equal to that defined by {@link MFCCConfig.featureSize}. */ 114 uint32_t featureSize; 115 }; 116 117 /** 118 * @brief Defines the functions for MFCCProcessor. 119 * 120 * @since 2.2 121 * @version 1.0 122 */ 123 class MFCCProcessor : public FeatureProcessor { 124 public: 125 /** 126 * @brief Defines the constructor for MFCCProcessor. 127 * 128 * @since 2.2 129 * @version 1.0 130 */ 131 MFCCProcessor(); 132 133 /** 134 * @brief Defines the destructor for MFCCProcessor. 135 * 136 * @since 2.2 137 * @version 1.0 138 */ 139 virtual ~MFCCProcessor(); 140 141 /** 142 * @brief Initializes MFCCProcessor. 143 * 144 * @param config Indicates the pointer to the basic configuration of FeatureProcessor. 145 * The caller needs to pass in a pointer address defined by {@link MFCCConfig} and 146 * release the pointer after using it. 147 * @return Returns {@link RETCODE_SUCCESS} if the operation is successful; 148 * returns {@link RETCODE_FAILURE} otherwise. 149 * 150 * @since 2.2 151 * @version 1.0 152 */ 153 int32_t Init(const FeatureProcessorConfig *config) override; 154 155 /** 156 * @brief Performs feature processing. 157 * 158 * @param input Indicates the input data for FeatureProcessor. 159 * The caller must pass in FeatureData of the INT16 type defined by {@link DataType}, 160 * besides the address and data length must meet the configuration requirements. 161 * @param output Indicates the output data for FeatureProcessor. 162 * The caller must pass in FeatureData of the UINT16 type defined by {@link DataType}. 163 * If and only if its address is empty and the data length is <b>0</b>, 164 * data will be filled by the FeatureProcessor. 165 * @return Returns {@link RETCODE_SUCCESS} if the operation is successful; 166 * returns {@link RETCODE_FAILURE} otherwise. 167 * 168 * @since 2.2 169 * @version 1.0 170 */ 171 int32_t Process(const FeatureData &input, FeatureData &output) override; 172 173 /** 174 * @brief Releases resources. 175 * 176 * @since 2.2 177 * @version 1.0 178 */ 179 void Release() override; 180 181 private: 182 class MFCCImpl; 183 std::unique_ptr<MFCCImpl> impl_; 184 }; 185 } // namespace Feature 186 } // namespace AI 187 } // namespace OHOS 188 #endif // AUDIO_PREPROCESS_MFCC_PROCESSOR_H 189 /** @} */