from .commontypes import COMMON_TYPES, resolve_common_type
from .error import FFIError, CDefError
from . import _pycparser as pycparser
if sys.version_info < (3,):
lock = _thread.allocate_lock()
CDEF_SOURCE_STRING = "<cdef source string>"
_r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
re.DOTALL | re.MULTILINE)
_r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
r"\b((?:[^\n\\]|\\.)*?)$",
re.DOTALL | re.MULTILINE)
_r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
_r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
_r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
_r_words = re.compile(r"\w+|\S")
_r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE)
_r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b")
_r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b")
_r_cdecl = re.compile(r"\b__cdecl\b")
_r_extern_python = re.compile(r'\bextern\s*"'
r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.')
_r_star_const_space = re.compile( # matches "* const "
r"[*]\s*((const|volatile|restrict)\b\s*)+")
_r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+"
_r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.")
if _parser_cache is None:
_parser_cache = pycparser.CParser()
def _workaround_for_old_pycparser(csource):
# Workaround for a pycparser issue (fixed between pycparser 2.10 and
# 2.14): "char*const***" gives us a wrong syntax tree, the same as
# for "char***(*const)". This means we can't tell the difference
# afterwards. But "char(*const(***))" gives us the right syntax
# tree. The issue only occurs if there are several stars in
# sequence with no parenthesis inbetween, just possibly qualifiers.
# Attempt to fix it by adding some parentheses in the source: each
# time we see "* const" or "* const *", we add an opening
# parenthesis before each star---the hard part is figuring out where
match = _r_star_const_space.search(csource)
#print repr(''.join(parts)+csource), '=>',
parts.append(csource[:match.start()])
parts.append('('); closing = ')'
parts.append(match.group()) # e.g. "* const "
if csource.startswith('*', endpos):
parts.append('('); closing += ')'
csource = csource[endpos:i] + closing + csource[i:]
#print repr(''.join(parts)+csource)
def _preprocess_extern_python(csource):
# input: `extern "Python" int foo(int);` or
# `extern "Python" { int foo(int); }`
# void __cffi_extern_python_start;
# void __cffi_extern_python_stop;
# input: `extern "Python+C" int foo(int);`
# void __cffi_extern_python_plus_c_start;
# void __cffi_extern_python_stop;
match = _r_extern_python.search(csource)
#print ''.join(parts)+csource
parts.append(csource[:match.start()])
if 'C' in match.group(1):
parts.append('void __cffi_extern_python_plus_c_start; ')
parts.append('void __cffi_extern_python_start; ')
if csource[endpos] == '{':
closing = csource.find('}', endpos)
raise CDefError("'extern \"Python\" {': no '}' found")
if csource.find('{', endpos + 1, closing) >= 0:
raise NotImplementedError("cannot use { } inside a block "
"'extern \"Python\" { ... }'")
parts.append(csource[endpos+1:closing])
csource = csource[closing+1:]
semicolon = csource.find(';', endpos)
raise CDefError("'extern \"Python\": no ';' found")
parts.append(csource[endpos:semicolon+1])
csource = csource[semicolon+1:]
parts.append(' void __cffi_extern_python_stop;')
#print ''.join(parts)+csource
def _preprocess(csource):
# Remove comments. NOTE: this only work because the cdef() section
# should not contain any string literal!
csource = _r_comment.sub(' ', csource)
# Remove the "#define FOO x" lines
for match in _r_define.finditer(csource):
macroname, macrovalue = match.groups()
macrovalue = macrovalue.replace('\\\n', '').strip()
macros[macroname] = macrovalue
csource = _r_define.sub('', csource)
if pycparser.__version__ < '2.14':
csource = _workaround_for_old_pycparser(csource)
# BIG HACK: replace WINAPI or __stdcall with "volatile const".
# It doesn't make sense for the return type of a function to be
# "volatile volatile const", so we abuse it to detect __stdcall...
# Hack number 2 is that "int(volatile *fptr)();" is not valid C
# syntax, so we place the "volatile" before the opening parenthesis.
csource = _r_stdcall2.sub(' volatile volatile const(', csource)
csource = _r_stdcall1.sub(' volatile volatile const ', csource)
csource = _r_cdecl.sub(' ', csource)
# Replace `extern "Python"` with start/end markers
csource = _preprocess_extern_python(csource)
# Replace "[...]" with "[__dotdotdotarray__]"
csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
# Replace "...}" with "__dotdotdotNUM__}". This construction should
# occur only at the end of enums; at the end of structs we have "...;}"
# and at the end of vararg functions "...);". Also replace "=...[,}]"
# with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
# giving an unknown value.
matches = list(_r_partial_enum.finditer(csource))
for number, match in enumerate(reversed(matches)):
p2 = csource.find('...', p, match.end())
csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
assert csource[p:p+3] == '...'
csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
# Replace "int ..." or "unsigned long int..." with "__dotdotdotint__"
csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource)
# Replace "float ..." or "double..." with "__dotdotdotfloat__"
csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
# Replace all remaining "..." with the same name, "__dotdotdot__",
# which is declared with a typedef for the purpose of C parsing.
return csource.replace('...', ' __dotdotdot__ '), macros
def _common_type_names(csource):
# Look in the source for what looks like usages of types from the
# list of common types. A "usage" is approximated here as the
# appearance of the word, minus a "definition" of the type, which
# is the last word in a "typedef" statement. Approximative only
# but should be fine for all the common types.
look_for_words = set(COMMON_TYPES)
look_for_words.add('typedef')
for word in _r_words.findall(csource):
if word in look_for_words:
words_used.discard(previous_word)
look_for_words.discard(previous_word)
if is_typedef and paren == 0:
words_used.discard(previous_word)
look_for_words.discard(previous_word)
else: # word in COMMON_TYPES
self._included_declarations = set()
self._anonymous_counter = 0
self._structnode2type = weakref.WeakKeyDictionary()
self._uses_new_feature = None
def _parse(self, csource):
csource, macros = _preprocess(csource)
# XXX: for more efficiency we would need to poke into the
# internals of CParser... the following registers the
# typedefs, because their presence or absence influences the
# parsing itself (but what they are typedef'ed to plays no role)
ctn = _common_type_names(csource)
for name in sorted(self._declarations):
if name.startswith('typedef '):
csourcelines.append('# 1 "<cdef automatic initialization code>"')
for typename in typenames:
csourcelines.append('typedef int %s;' % typename)
csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,'
# this forces pycparser to consider the following in the file
# called <cdef source string> from line 1
csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,))
csourcelines.append(csource)
fullcsource = '\n'.join(csourcelines)
lock.acquire() # pycparser is not thread-safe...
ast = _get_parser().parse(fullcsource)
except pycparser.c_parser.ParseError as e:
self.convert_pycparser_error(e, csource)
# csource will be used to find buggy source text
return ast, macros, csource
def _convert_pycparser_error(self, e, csource):
# xxx look for "<cdef source string>:NUM:" at the start of str(e)
# and interpret that as a line number. This will not work if
# the user gives explicit ``# NUM "FILE"`` directives.
match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg)
linenum = int(match.group(1), 10)
csourcelines = csource.splitlines()
if 1 <= linenum <= len(csourcelines):
line = csourcelines[linenum-1]
def convert_pycparser_error(self, e, csource):
line = self._convert_pycparser_error(e, csource)
msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
msg = 'parse error\n%s' % (msg,)
def parse(self, csource, override=False, packed=False, dllexport=False):
prev_options = self._options
self._options = {'override': override,
self._internal_parse(csource)
self._options = prev_options
def _internal_parse(self, csource):
ast, macros, csource = self._parse(csource)
self._process_macros(macros)
# find the first "__dotdotdot__" and use that as a separator
# between the repeated typedefs and the real csource
if decl.name == '__dotdotdot__':
self._inside_extern_python = '__cffi_extern_python_stop'
if isinstance(decl, pycparser.c_ast.Decl):
elif isinstance(decl, pycparser.c_ast.Typedef):
raise CDefError("typedef does not declare any name",
if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and
decl.type.type.names[-1].startswith('__dotdotdot')):
realtype = self._get_unknown_type(decl)
elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
isinstance(decl.type.type.type,
pycparser.c_ast.IdentifierType) and
decl.type.type.type.names[-1].startswith('__dotdotdot')):
realtype = self._get_unknown_ptr_type(decl)
realtype, quals = self._get_type_and_quals(
decl.type, name=decl.name, partial_length_ok=True)
self._declare('typedef ' + decl.name, realtype, quals=quals)
elif decl.__class__.__name__ == 'Pragma':
pass # skip pragma, only in pycparser 2.15
raise CDefError("unexpected <%s>: this construct is valid "
"C but not valid in cdef()" %
decl.__class__.__name__, decl)
e.args = e.args + (current_decl,)
msg = self._convert_pycparser_error(e, csource)
e.args = (e.args[0] + "\n *** Err: %s" % msg,)
def _add_constants(self, key, val):
if key in self._int_constants:
if self._int_constants[key] == val:
return # ignore identical double declarations
"multiple declarations of constant: %s" % (key,))
self._int_constants[key] = val
def _add_integer_constant(self, name, int_str):
int_str = int_str.lower().rstrip("ul")
neg = int_str.startswith('-')
# "010" is not valid oct in py3
if (int_str.startswith("0") and int_str != '0'
and not int_str.startswith("0x")):
int_str = "0o" + int_str[1:]
pyvalue = int(int_str, 0)
self._add_constants(name, pyvalue)
self._declare('macro ' + name, pyvalue)
def _process_macros(self, macros):
for key, value in macros.items():
if _r_int_literal.match(value):
self._add_integer_constant(key, value)
self._declare('macro ' + key, value)
'only supports one of the following syntax:\n'
' #define %s ... (literally dot-dot-dot)\n'
' #define %s NUMBER (with NUMBER an integer'
' constant, decimal/hex/octal)\n'
% (key, key, key, value))
def _declare_function(self, tp, quals, decl):
tp = self._get_type_pointer(tp, quals)
if self._options.get('dllexport'):
tag = 'dllexport_python '
elif self._inside_extern_python == '__cffi_extern_python_start':
elif self._inside_extern_python == '__cffi_extern_python_plus_c_start':
tag = 'extern_python_plus_c '
self._declare(tag + decl.name, tp)
def _parse_decl(self, decl):
if isinstance(node, pycparser.c_ast.FuncDecl):
tp, quals = self._get_type_and_quals(node, name=decl.name)
assert isinstance(tp, model.RawFunctionType)
self._declare_function(tp, quals, decl)
if isinstance(node, pycparser.c_ast.Struct):
self._get_struct_union_enum_type('struct', node)
elif isinstance(node, pycparser.c_ast.Union):
self._get_struct_union_enum_type('union', node)
elif isinstance(node, pycparser.c_ast.Enum):
self._get_struct_union_enum_type('enum', node)
raise CDefError("construct does not declare any variable",
tp, quals = self._get_type_and_quals(node,
self._declare_function(tp, quals, decl)
elif (tp.is_integer_type() and
hasattr(decl, 'init') and
hasattr(decl.init, 'value') and
_r_int_literal.match(decl.init.value)):
self._add_integer_constant(decl.name, decl.init.value)
elif (tp.is_integer_type() and
isinstance(decl.init, pycparser.c_ast.UnaryOp) and
hasattr(decl.init.expr, 'value') and
_r_int_literal.match(decl.init.expr.value)):
self._add_integer_constant(decl.name,
'-' + decl.init.expr.value)
elif (tp is model.void_type and
decl.name.startswith('__cffi_extern_python_')):
# hack: `extern "Python"` in the C source is replaced
# with "void __cffi_extern_python_start;" and
# "void __cffi_extern_python_stop;"
self._inside_extern_python = decl.name
if self._inside_extern_python !='__cffi_extern_python_stop':
"cannot declare constants or "
"variables with 'extern \"Python\"'")
if (quals & model.Q_CONST) and not tp.is_array_type:
self._declare('constant ' + decl.name, tp, quals=quals)
self._declare('variable ' + decl.name, tp, quals=quals)
def parse_type(self, cdecl):
return self.parse_type_and_quals(cdecl)[0]
def parse_type_and_quals(self, cdecl):
ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
exprnode = ast.ext[-1].type.args.params[0]
if isinstance(exprnode, pycparser.c_ast.ID):
raise CDefError("unknown identifier '%s'" % (exprnode.name,))
return self._get_type_and_quals(exprnode.type)
def _declare(self, name, obj, included=False, quals=0):
if name in self._declarations:
prevobj, prevquals = self._declarations[name]
if prevobj is obj and prevquals == quals:
if not self._options.get('override'):
"multiple declarations of %s (for interactive usage, "
"try cdef(xx, override=True))" % (name,))
assert '__dotdotdot__' not in name.split()
self._declarations[name] = (obj, quals)
self._included_declarations.add(obj)
def _extract_quals(self, type):