1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# Copyright 2014 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Helper functions useful when writing scripts that integrate with GN.
8
9The main functions are ToGNString and from_gn_string which convert between
10serialized GN variables and Python variables.
11
12To use in a random python file in the build:
13
14  import os
15  import sys
16
17  sys.path.append(os.path.join(os.path.dirname(__file__),
18                               os.pardir, os.pardir, "build"))
19  import gn_helpers
20
21Where the sequence of parameters to join is the relative path from your source
22file to the build directory.
23"""
24
25
26class GNException(Exception):
27    pass
28
29
30def to_gn_string(value: str, allow_dicts: bool = True) -> str:
31    """Returns a stringified GN equivalent of the Python value.
32
33    allow_dicts indicates if this function will allow converting dictionaries
34    to GN scopes. This is only possible at the top level, you can't nest a
35    GN scope in a list, so this should be set to False for recursive calls.
36    """
37    if isinstance(value, str):
38        if value.find('\n') >= 0:
39            raise GNException("Trying to print a string with a newline in it.")
40        return '"' + \
41               value.replace('\\', '\\\\').replace('"', '\\"').replace('$', '\\$') + \
42               '"'
43
44    if isinstance(value, str):
45        return to_gn_string(value.encode('utf-8'))
46
47    if isinstance(value, bool):
48        if value:
49            return "true"
50        return "false"
51
52    if isinstance(value, list):
53        return '[ %s ]' % ', '.join(to_gn_string(v) for v in value)
54
55    if isinstance(value, dict):
56        if not allow_dicts:
57            raise GNException("Attempting to recursively print a dictionary.")
58        result = ""
59        for key in sorted(value):
60            if not isinstance(key, str):
61                raise GNException("Dictionary key is not a string.")
62            result += "%s = %s\n" % (key, to_gn_string(value[key], False))
63        return result
64
65    if isinstance(value, int):
66        return str(value)
67
68    raise GNException("Unsupported type when printing to GN.")
69
70
71def from_gn_string(input_string: str) -> dict:
72    """Converts the input string from a GN serialized value to Python values.
73
74    For details on supported types see GNValueParser.parse() below.
75
76    If your GN script did:
77      something = [ "file1", "file2" ]
78      args = [ "--values=$something" ]
79    The command line would look something like:
80      --values="[ \"file1\", \"file2\" ]"
81    Which when interpreted as a command line gives the value:
82      [ "file1", "file2" ]
83
84    You can parse this into a Python list using GN rules with:
85      input_values = FromGNValues(options.values)
86    Although the Python 'ast' module will parse many forms of such input, it
87    will not handle GN escaping properly, nor GN booleans. You should use this
88    function instead.
89
90
91    A NOTE ON STRING HANDLING:
92
93    If you just pass a string on the command line to your Python script, or use
94    string interpolation on a string variable, the strings will not be quoted:
95      str = "asdf"
96      args = [ str, "--value=$str" ]
97    Will yield the command line:
98      asdf --value=asdf
99    The unquoted asdf string will not be valid input to this function, which
100    accepts only quoted strings like GN scripts. In such cases, you can just
101    use the Python string literal directly.
102
103    The main use cases for this is for other types, in particular lists. When
104    using string interpolation on a list (as in the top example) the embedded
105    strings will be quoted and escaped according to GN rules so the list can be
106    re-parsed to get the same result.
107    """
108    parser = GNValueParser(input_string)
109    return parser.parse()
110
111
112def from_gn_args(input_string: str) -> dict:
113    """Converts a string with a bunch of gn arg assignments into a Python dict.
114
115    Given a whitespace-separated list of
116
117      <ident> = (integer | string | boolean | <list of the former>)
118
119    gn assignments, this returns a Python dict, i.e.:
120
121      from_gn_args("foo=true\nbar=1\n") -> { 'foo': True, 'bar': 1 }.
122
123    Only simple types and lists supported; variables, structs, calls
124    and other, more complicated things are not.
125
126    This routine is meant to handle only the simple sorts of values that
127    arise in parsing --args.
128    """
129    parser = GNValueParser(input_string)
130    return parser.parse_args()
131
132
133def unescape_gn_special_char(char_after_backslash: str) -> str:
134    # Process the GN escape character and return it if it is a valid escape character; Otherwise, return a back slash
135    if char_after_backslash in ('$', '"', '\\'):
136        return char_after_backslash
137    else:
138        return '\\'
139
140
141def unescape_gn_string(value: list) -> str:
142    """Given a string with GN escaping, returns the unescaped string.
143
144    Be careful not to feed with input from a Python parsing function like
145    'ast' because it will do Python unescaping, which will be incorrect when
146    fed into the GN unescaper.
147    """
148    result = []
149    i = 0
150    skip_char = False
151    while i < len(value):
152        if value[i] == '\\':
153            if i < len(value) - 1:
154            # If it is not the last element of the list and the current character is a back slash
155                next_char = value[i + 1]
156                result.append(unescape_gn_special_char(next_char))
157                skip_char = next_char in ('$', '"', '\\')
158        else:
159            result.append(value[i])
160        i += 2 if skip_char else 1
161        skip_char = False
162    return ''.join(result)
163
164
165def _is_digit_or_minus(char: str):
166    return char in "-0123456789"
167
168
169class GNValueParser(object):
170    """Duplicates GN parsing of values and converts to Python types.
171
172    Normally you would use the wrapper function FromGNValue() below.
173
174    If you expect input as a specific type, you can also call one of the Parse*
175    functions directly. All functions throw GNException on invalid input.
176    """
177
178    def __init__(self, string: str):
179        self.input = string
180        self.cur = 0
181
182    def is_done(self) -> bool:
183        return self.cur == len(self.input)
184
185    def consume_whitespace(self):
186        while not self.is_done() and self.input[self.cur] in ' \t\n':
187            self.cur += 1
188
189    def parse(self):
190        """Converts a string representing a printed GN value to the Python type.
191
192        See additional usage notes on from_gn_string above.
193
194        - GN booleans ('true', 'false') will be converted to Python booleans.
195
196        - GN numbers ('123') will be converted to Python numbers.
197
198        - GN strings (double-quoted as in '"asdf"') will be converted to Python
199          strings with GN escaping rules. GN string interpolation (embedded
200          variables preceded by $) are not supported and will be returned as
201          literals.
202
203        - GN lists ('[1, "asdf", 3]') will be converted to Python lists.
204
205        - GN scopes ('{ ... }') are not supported.
206        """
207        result = self._parse_allow_trailing()
208        self.consume_whitespace()
209        if not self.is_done():
210            raise GNException("Trailing input after parsing:\n  " +
211                              self.input[self.cur:])
212        return result
213
214    def parse_args(self) -> dict:
215        """Converts a whitespace-separated list of ident=literals to a dict.
216
217        See additional usage notes on from_gn_args, above.
218        """
219        d = {}
220
221        self.consume_whitespace()
222        while not self.is_done():
223            ident = self._parse_ident()
224            self.consume_whitespace()
225            if self.input[self.cur] != '=':
226                raise GNException("Unexpected token: " + self.input[self.cur:])
227            self.cur += 1
228            self.consume_whitespace()
229            val = self._parse_allow_trailing()
230            self.consume_whitespace()
231            d[ident] = val
232
233        return d
234
235    def parse_number(self) -> int:
236        self.consume_whitespace()
237        if self.is_done():
238            raise GNException('Expected number but got nothing.')
239
240        begin = self.cur
241
242        # The first character can include a negative sign.
243        if not self.is_done() and _is_digit_or_minus(self.input[self.cur]):
244            self.cur += 1
245        while not self.is_done() and self.input[self.cur].isdigit():
246            self.cur += 1
247
248        number_string = self.input[begin:self.cur]
249        if not len(number_string) or number_string == '-':
250            raise GNException("Not a valid number.")
251        return int(number_string)
252
253    def parse_string(self) -> str:
254        self.consume_whitespace()
255        if self.is_done():
256            raise GNException('Expected string but got nothing.')
257
258        if self.input[self.cur] != '"':
259            raise GNException('Expected string beginning in a " but got:\n  ' +
260                              self.input[self.cur:])
261        self.cur += 1  # Skip over quote.
262
263        begin = self.cur
264        while not self.is_done() and self.input[self.cur] != '"':
265            if self.input[self.cur] == '\\':
266                self.cur += 1  # Skip over the backslash.
267                if self.is_done():
268                    raise GNException("String ends in a backslash in:\n  " +
269                                      self.input)
270            self.cur += 1
271
272        if self.is_done():
273            raise GNException('Unterminated string:\n  ' + self.input[begin:])
274
275        end = self.cur
276        self.cur += 1  # Consume trailing ".
277
278        return unescape_gn_string(self.input[begin:end])
279
280    def parse_list(self):
281        self.consume_whitespace()
282        if self.is_done():
283            raise GNException('Expected list but got nothing.')
284
285        # Skip over opening '['.
286        if self.input[self.cur] != '[':
287            raise GNException("Expected [ for list but got:\n  " +
288                              self.input[self.cur:])
289        self.cur += 1
290        self.consume_whitespace()
291        if self.is_done():
292            raise GNException("Unterminated list:\n  " + self.input)
293
294        list_result = []
295        previous_had_trailing_comma = True
296        while not self.is_done():
297            if self.input[self.cur] == ']':
298                self.cur += 1  # Skip over ']'.
299                return list_result
300
301            if not previous_had_trailing_comma:
302                raise GNException("List items not separated by comma.")
303
304            list_result += [self._parse_allow_trailing()]
305            self.consume_whitespace()
306            if self.is_done():
307                break
308
309            # Consume comma if there is one.
310            previous_had_trailing_comma = self.input[self.cur] == ','
311            if previous_had_trailing_comma:
312                # Consume comma.
313                self.cur += 1
314                self.consume_whitespace()
315
316        raise GNException("Unterminated list:\n  " + self.input)
317
318    def _constant_follows(self, constant) -> bool:
319        """Returns true if the given constant follows immediately at the
320        current location in the input. If it does, the text is consumed and
321        the function returns true. Otherwise, returns false and the current
322        position is unchanged."""
323        end = self.cur + len(constant)
324        if end > len(self.input):
325            return False  # Not enough room.
326        if self.input[self.cur:end] == constant:
327            self.cur = end
328            return True
329        return False
330
331    def _parse_allow_trailing(self):
332        """Internal version of Parse that doesn't check for trailing stuff."""
333        self.consume_whitespace()
334        if self.is_done():
335            raise GNException("Expected input to parse.")
336
337        next_char = self.input[self.cur]
338        if next_char == '[':
339            return self.parse_list()
340        elif _is_digit_or_minus(next_char):
341            return self.parse_number()
342        elif next_char == '"':
343            return self.parse_string()
344        elif self._constant_follows('true'):
345            return True
346        elif self._constant_follows('false'):
347            return False
348        else:
349            raise GNException("Unexpected token: " + self.input[self.cur:])
350
351    def _parse_ident(self) -> str:
352        ident = ''
353
354        next_char = self.input[self.cur]
355        if not next_char.isalpha() and not next_char == '_':
356            raise GNException("Expected an identifier: " + self.input[self.cur:])
357
358        ident += next_char
359        self.cur += 1
360
361        next_char = self.input[self.cur]
362        while next_char.isalpha() or next_char.isdigit() or next_char == '_':
363            ident += next_char
364            self.cur += 1
365            next_char = self.input[self.cur]
366
367        return ident
368