1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "csv_parser.h"
16
17 #include <algorithm>
18 #include <cctype>
19
META_BEGIN_NAMESPACE()20 META_BEGIN_NAMESPACE()
21
22 CsvParser::CsvParser(BASE_NS::string_view csv, const char delimiter) : delimiter_(delimiter), csv_(csv) {}
23
GetRow(CsvRow & row)24 bool CsvParser::GetRow(CsvRow& row)
25 {
26 auto nextRow = ParseRow();
27 row.swap(nextRow);
28 return !row.empty();
29 }
30
Reset()31 void CsvParser::Reset()
32 {
33 pos_ = 0;
34 }
35
36 /**
37 * @brief Returns a trimmed string based on state.
38 * @param sv The string to trim.
39 * @param state State of the parser.
40 * @return If state is QUOTED, returns the string itself. Otherwise returns the string
41 * trimmed from trailing and leading whitespace.
42 */
Trimmed(BASE_NS::string_view sv,State state)43 BASE_NS::string_view CsvParser::Trimmed(BASE_NS::string_view sv, State state)
44 {
45 if (state == QUOTED) {
46 return sv;
47 }
48 constexpr auto nspace = [](unsigned char ch) { return !std::isspace(static_cast<int>(ch)); };
49 sv.remove_suffix(std::distance(std::find_if(sv.rbegin(), sv.rend(), nspace).base(), sv.end()));
50 sv.remove_prefix(std::find_if(sv.begin(), sv.end(), nspace) - sv.begin());
51 return sv;
52 }
53
HandleEscaped(char next)54 std::pair<bool, char> HandleEscaped(char next)
55 {
56 std::pair<bool, char> result { true, next };
57 switch (next) {
58 case 'n':
59 result.second = '\n';
60 break;
61 case '\\':
62 result.second = '\\';
63 break;
64 case 't':
65 result.second = '\t';
66 break;
67 case '"':
68 result.second = '"';
69 break;
70 default:
71 result.first = false;
72 break;
73 }
74 return result;
75 }
76
ParseRow()77 CsvParser::CsvRow CsvParser::ParseRow()
78 {
79 BASE_NS::vector<BASE_NS::string> items;
80 BASE_NS::string item;
81 State state { NO_QUOTE };
82
83 while (pos_ < csv_.size()) {
84 auto c = csv_[pos_++];
85 if (c == '\r') { // Ignore carriage returns
86 continue;
87 }
88 if (c == '"') {
89 if (state == IN_QUOTE && pos_ < csv_.size() - 1 && csv_[pos_] == '"') {
90 // Double quotes interpreted as a single quote
91 item += c;
92 pos_++;
93 } else { // Begin/end quote
94 state = (state == NO_QUOTE) ? IN_QUOTE : QUOTED;
95 if (state == IN_QUOTE) {
96 // Quoted part starts, ignore anything before it
97 item.clear();
98 }
99 }
100 } else if (c == delimiter_ && state != IN_QUOTE) {
101 // Delimiter found while not within quotes, move to next item
102 items.emplace_back(Trimmed(item, state));
103 item.clear();
104 state = NO_QUOTE;
105 } else if (c == '\n' && state != IN_QUOTE) {
106 // End of line while not within quotes, the row is complete
107 break;
108 } else if (state != QUOTED) {
109 // By default include character in result, unless we already had
110 // quoted content, then anything outside of quotes is ignored until
111 // next delimiter
112 if (c == '\\' && pos_ < csv_.size() - 1) {
113 if (auto esc = HandleEscaped(csv_[pos_]); esc.first) {
114 item += esc.second;
115 pos_++;
116 continue;
117 }
118 }
119 item += c;
120 }
121 }
122
123 // Any leftover since the last delimiter is the last item on the row
124 if (auto trimmed = Trimmed(item, state); !trimmed.empty()) {
125 items.emplace_back(trimmed);
126 }
127 return items;
128 }
129
130 META_END_NAMESPACE()
131