1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 /**
17  * @addtogroup feature_processor
18  * @{
19  *
20  * @brief Defines the basic functions for FeatureProcessor, including the supported data types
21  * and other related configuration parameters.
22  *
23  * @since 2.2
24  * @version 1.0
25  */
26 
27 /**
28  * @file mfcc_processor.h
29  *
30  * @brief Defines MFCCProcessor that calculates the Mel-frequency cepstral coefficient (MFCC) based
31  * on the 16-bit PCM audio data.
32  *
33  * @since 2.2
34  * @version 1.0
35  */
36 
37 #ifndef AUDIO_PREPROCESS_MFCC_PROCESSOR_H
38 #define AUDIO_PREPROCESS_MFCC_PROCESSOR_H
39 
40 #include <cstdint>
41 #include <memory>
42 
43 #include "feature_processor.h"
44 
45 namespace OHOS {
46 namespace AI {
47 namespace Feature {
48 /**
49  * @brief Specifies the structure for the MFCCProcessor configuration.
50  *
51  * @since 2.2
52  * @version 1.0
53  */
54 struct MFCCConfig : FeatureProcessorConfig {
55     /** Indicates whether to execute the energy gain */
56     bool enablePcanGain;
57     /** Indicates whether to execute the weighted logarithm */
58     bool enableLogScale;
59     /** Number of smooth bits for noise reduction.
60      * The value is multiplied by 2 to the power of <b>noiseSmoothingBits</b> for signal smoothing. */
61     int16_t noiseSmoothingBits;
62     /** Number of gain bits.
63      * The value is multiplied by 2 to the power of <b>pcanGainBits</b> to achieve the gain effect. */
64     int16_t pcanGainBits;
65     /** Logarithmic weight. The output logarithm is multiplied by 2 to the power of <b>logScaleShift</b>. */
66     int16_t logScaleShift;
67     /** Size of the audio sampling window. The maximum value is <b>16000</b>. */
68     uint32_t windowSize;
69     /** Interval between audio sliding windows. The maximum value is <b>16000</b>. */
70     uint32_t slideSize;
71     /** Sampling rate of audio data. */
72     uint32_t sampleRate;
73     /** Number of MFCC features. The maximum value is {@link MAX_SAMPLE_SIZE}. */
74     uint32_t featureSize;
75     /** Number of frequency-domain channels. The maximum value is {@link MAX_NUM_CHANNELS}. */
76     uint32_t numChannels;
77     /** Maximum frequency threshold of the filter. The value must be greater than <b>0</b> and the actual value depends
78     * on the specific frequency requirements. */
79     float filterbankLowerBandLimit;
80     /** Minimum frequency threshold of the filter.
81      * The value must be greater than <b>0</b> and less than <b>filterbankLowerBandLimit</b>. */
82     float filterbankUpperBandLimit;
83     /** Smoothing coefficient for even channels of noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>. */
84     float noiseEvenSmoothing;
85     /** Smoothing coefficient for odd channels of noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>. */
86     float noiseOddSmoothing;
87     /** Signal reservation ratio for noise estimation. The value ranges from <b>0.0</b> to <b>1.0</b>.
88      * Value <b>1.0</b> indicates the signals are not filtered, and value <b>0.0</b> indicates signals are
89      * all filtered. */
90     float noiseMinSignalRemaining;
91     /** Gain normalization index. The value ranges from <b>0.0</b> to <b>1.0</b>.
92      * Value <b>0.0</b> indicates no gain, and value <b>1.0</b> indicates full gain. */
93     float pcanGainStrength;
94     /** Denominator offset for gain normalization to prevent the base number from being <b>0</b>.
95      * The value must be greater than <b>0</b>. */
96     float pcanGainOffset;
97 };
98 
99 /**
100  * @brief Defines the sliding window configuration for MFCCProcessor.
101  *
102  * @since 2.2
103  * @version 1.0
104  */
105 struct MFCCLocalConfig {
106     /** Number of samples for each sliding. The value is equal to that defined by {@link MFCCConfig.slideSize}. */
107     uint32_t slideSampleNum;
108     /** Number of samples for each sampling. The value is equal to that defined by {@link MFCCConfig.windowSize}. */
109     uint32_t windowSampleNum;
110     /** Number of frequency-domain channels. The value is equal to that defined by {@link MFCCConfig.numChannels}. */
111     uint32_t numChannels;
112     /** Number of MFCC features corresponding to the samples.
113      * The value is equal to that defined by {@link MFCCConfig.featureSize}. */
114     uint32_t featureSize;
115 };
116 
117 /**
118  * @brief Defines the functions for MFCCProcessor.
119  *
120  * @since 2.2
121  * @version 1.0
122  */
123 class MFCCProcessor : public FeatureProcessor {
124 public:
125     /**
126      * @brief Defines the constructor for MFCCProcessor.
127      *
128      * @since 2.2
129      * @version 1.0
130      */
131     MFCCProcessor();
132 
133     /**
134      * @brief Defines the destructor for MFCCProcessor.
135      *
136      * @since 2.2
137      * @version 1.0
138      */
139     virtual ~MFCCProcessor();
140 
141     /**
142      * @brief Initializes MFCCProcessor.
143      *
144      * @param config Indicates the pointer to the basic configuration of FeatureProcessor.
145      * The caller needs to pass in a pointer address defined by {@link MFCCConfig} and
146      * release the pointer after using it.
147      * @return Returns {@link RETCODE_SUCCESS} if the operation is successful;
148      * returns {@link RETCODE_FAILURE} otherwise.
149      *
150      * @since 2.2
151      * @version 1.0
152      */
153     int32_t Init(const FeatureProcessorConfig *config) override;
154 
155     /**
156      * @brief Performs feature processing.
157      *
158      * @param input Indicates the input data for FeatureProcessor.
159      * The caller must pass in FeatureData of the INT16 type defined by {@link DataType},
160      * besides the address and data length must meet the configuration requirements.
161      * @param output Indicates the output data for FeatureProcessor.
162      * The caller must pass in FeatureData of the UINT16 type defined by {@link DataType}.
163      * If and only if its address is empty and the data length is <b>0</b>,
164      * data will be filled by the FeatureProcessor.
165      * @return Returns {@link RETCODE_SUCCESS} if the operation is successful;
166      * returns {@link RETCODE_FAILURE} otherwise.
167      *
168      * @since 2.2
169      * @version 1.0
170      */
171     int32_t Process(const FeatureData &input, FeatureData &output) override;
172 
173     /**
174      * @brief Releases resources.
175      *
176      * @since 2.2
177      * @version 1.0
178      */
179     void Release() override;
180 
181 private:
182     class MFCCImpl;
183     std::unique_ptr<MFCCImpl> impl_;
184 };
185 } // namespace Feature
186 } // namespace AI
187 } // namespace OHOS
188 #endif // AUDIO_PREPROCESS_MFCC_PROCESSOR_H
189 /** @} */