src/util/xmlpool/gen_xmlpool.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230


#
# Usage:
#     gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
#
# For each given language, this script expects to find a .mo file at
# `{localedir}/{language}/LC_MESSAGES/options.mo`.
#

from __future__ import print_function
import argparse
import gettext
import io
import os
import re
import sys

parser = argparse.ArgumentParser()
parser.add_argument('template')
parser.add_argument('localedir')
parser.add_argument('languages', nargs='*')
args = parser.parse_args()

if sys.version_info < (3, 0):
    gettext_method = 'ugettext'
else:
    gettext_method = 'gettext'

# Escape special characters in C strings
def escapeCString(s):
    escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
                  '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
    # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
    # Better use Unicode characters for typographic quotes in option
    # descriptions and translations.
    i = 0
    r = ''
    while i < len(s):
        # Special case: escape double quote with \u201c or \u201d, depending
        # on whether it's an open or close quote. This is needed because plain
        # double quotes are not possible in XML attributes.
        if s[i] == '"':
            if i == len(s) - 1 or s[i + 1].isspace():
                # close quote
                q = u'\u201c'
            else:
                # open quote
                q = u'\u201d'
            r = r + q
        elif s[i] in escapeSeqs:
            r = r + escapeSeqs[s[i]]
        else:
            r = r + s[i]
        i = i + 1
    return r

# Expand escape sequences in C strings (needed for gettext lookup)
def expandCString(s):
    escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
                  'r' : '\r', 't' : '\t', 'v' : '\v',
                  '"' : '"', '\\' : '\\'}
    i = 0
    escape = False
    hexa = False
    octa = False
    num = 0
    digits = 0
    r = u''
    while i < len(s):
        if not escape:
            if s[i] == '\\':
                escape = True
            else:
                r = r + s[i]
        elif hexa:
            if (s[i] >= '0' and s[i] <= '9') or \
               (s[i] >= 'a' and s[i] <= 'f') or \
               (s[i] >= 'A' and s[i] <= 'F'):
                num = num * 16 + int(s[i],16)
                digits = digits + 1
            else:
                digits = 2
            if digits >= 2:
                hexa = False
                escape = False
                r = r + chr(num)
        elif octa:
            if s[i] >= '0' and s[i] <= '7':
                num = num * 8 + int(s[i],8)
                digits = digits + 1
            else:
                digits = 3
            if digits >= 3:
                octa = False
                escape = False
                r = r + chr(num)
        else:
            if s[i] in escapeSeqs:
                r = r + escapeSeqs[s[i]]
                escape = False
            elif s[i] >= '0' and s[i] <= '7':
                octa = True
                num = int(s[i],8)
                if num <= 3:
                    digits = 1
                else:
                    digits = 2
            elif s[i] == 'x' or s[i] == 'X':
                hexa = True
                num = 0
                digits = 0
            else:
                r = r + s[i]
                escape = False
        i = i + 1
    return r

# Expand matches. The first match is always a DESC or DESC_BEGIN match.
# Subsequent matches are ENUM matches.
#
# DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
# ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
def expandMatches(matches, translations, end=None):
    assert len(matches) > 0
    nTranslations = len(translations)
    i = 0
    # Expand the description+enums for all translations
    for lang,trans in translations:
        i = i + 1
        # Make sure that all but the last line of a simple description
        # are extended with a backslash.
        suffix = ''
        if len(matches) == 1 and i < len(translations) and \
               not matches[0].expand(r'\7').endswith('\\'):
            suffix = ' \\'
        text = escapeCString(getattr(trans, gettext_method)(expandCString(
            matches[0].expand (r'\5'))))
        text = (matches[0].expand(r'\1' + lang + r'\3"' + text + r'"\7') + suffix)

        # In Python 2, stdout expects encoded byte strings, or else it will
        # encode them with the ascii 'codec'
        if sys.version_info.major == 2:
            text = text.encode('utf-8')

        print(text)

        # Expand any subsequent enum lines
        for match in matches[1:]:
            text = escapeCString(getattr(trans, gettext_method)(expandCString(
                match.expand(r'\3'))))
            text = match.expand(r'\1"' + text + r'"\5')

            # In Python 2, stdout expects encoded byte strings, or else it will
            # encode them with the ascii 'codec'
            if sys.version_info.major == 2:
                text = text.encode('utf-8')

            print(text)

        # Expand description end
        if end:
            print(end, end='')

# Compile a list of translation classes to all supported languages.
# The first translation is always a NullTranslations.
translations = [("en", gettext.NullTranslations())]
for lang in args.languages:
    try:
        filename = os.path.join(args.localedir, '{}.gmo'.format(lang))
        with io.open(filename, 'rb') as f:
            trans = gettext.GNUTranslations(f)
    except (IOError, OSError):
        print("Warning: language '%s' not found." % lang, file=sys.stderr)
        continue
    translations.append((lang, trans))

# Regular expressions:
reLibintl_h = re.compile(r'#\s*include\s*<libintl.h>')
reDESC = re.compile(r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reDESC_BEGIN = re.compile(r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reENUM = re.compile(r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
reDESC_END = re.compile(r'\s*DRI_CONF_DESC_END')

# Print a header
print("/***********************************************************************\n" \
" ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
" ***********************************************************************/")

# Process the options template and generate options.h with all
# translations.
template = io.open(args.template, mode="rt", encoding='utf-8')
descMatches = []
for line in template:
    if len(descMatches) > 0:
        matchENUM = reENUM.match(line)
        matchDESC_END = reDESC_END.match(line)
        if matchENUM:
            descMatches.append(matchENUM)
        elif matchDESC_END:
            expandMatches(descMatches, translations, line)
            descMatches = []
        else:
            print("Warning: unexpected line inside description dropped:\n", line,
                  file=sys.stderr)
        continue
    if reLibintl_h.search(line):
        # Ignore (comment out) #include <libintl.h>
        print("/* %s * commented out by gen_xmlpool.py */" % line)
        continue
    matchDESC = reDESC.match(line)
    matchDESC_BEGIN = reDESC_BEGIN.match(line)
    if matchDESC:
        assert len(descMatches) == 0
        expandMatches([matchDESC], translations)
    elif matchDESC_BEGIN:
        assert len(descMatches) == 0
        descMatches = [matchDESC_BEGIN]
    else:
        # In Python 2, stdout expects encoded byte strings, or else it will
        # encode them with the ascii 'codec'
        if sys.version_info.major == 2:
           line = line.encode('utf-8')

        print(line, end='')

template.close()

if len(descMatches) > 0:
    print("Warning: unterminated description at end of file.", file=sys.stderr)
    expandMatches(descMatches, translations)