Edit File by line

[0] Fix | Delete

# ElementTree

[1] Fix | Delete

# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $

[2] Fix | Delete

[3] Fix | Delete

# limited xpath support for element trees

[4] Fix | Delete

[5] Fix | Delete

# history:

[6] Fix | Delete

# 2003-05-23 fl created

[7] Fix | Delete

# 2003-05-28 fl added support for // etc

[8] Fix | Delete

# 2003-08-27 fl fixed parsing of periods in element names

[9] Fix | Delete

# 2007-09-10 fl new selection engine

[10] Fix | Delete

# 2007-09-12 fl fixed parent selector

[11] Fix | Delete

# 2007-09-13 fl added iterfind; changed findall to return a list

[12] Fix | Delete

# 2007-11-30 fl added namespaces support

[13] Fix | Delete

# 2009-10-30 fl added child element value filter

[14] Fix | Delete

[15] Fix | Delete

[16] Fix | Delete

[17] Fix | Delete

# fredrik@pythonware.com

[18] Fix | Delete

# http://www.pythonware.com

[19] Fix | Delete

[20] Fix | Delete

# --------------------------------------------------------------------

[21] Fix | Delete

# The ElementTree toolkit is

[22] Fix | Delete

[23] Fix | Delete

[24] Fix | Delete

[25] Fix | Delete

# By obtaining, using, and/or copying this software and/or its

[26] Fix | Delete

# associated documentation, you agree that you have read, understood,

[27] Fix | Delete

# and will comply with the following terms and conditions:

[28] Fix | Delete

[29] Fix | Delete

# Permission to use, copy, modify, and distribute this software and

[30] Fix | Delete

# its associated documentation for any purpose and without fee is

[31] Fix | Delete

# hereby granted, provided that the above copyright notice appears in

[32] Fix | Delete

# all copies, and that both that copyright notice and this permission

[33] Fix | Delete

# notice appear in supporting documentation, and that the name of

[34] Fix | Delete

# Secret Labs AB or the author not be used in advertising or publicity

[35] Fix | Delete

# pertaining to distribution of the software without specific, written

[36] Fix | Delete

# prior permission.

[37] Fix | Delete

[38] Fix | Delete

# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD

[39] Fix | Delete

# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-

[40] Fix | Delete

# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR

[41] Fix | Delete

# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY

[42] Fix | Delete

# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

[43] Fix | Delete

# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

[44] Fix | Delete

# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

[45] Fix | Delete

# OF THIS SOFTWARE.

[46] Fix | Delete

# --------------------------------------------------------------------

[47] Fix | Delete

[48] Fix | Delete

# Licensed to PSF under a Contributor Agreement.

[49] Fix | Delete

# See http://www.python.org/psf/license for licensing details.

[50] Fix | Delete

[51] Fix | Delete

[52] Fix | Delete

# Implementation module for XPath support. There's usually no reason

[53] Fix | Delete

# to import this module directly; the <b>ElementTree</b> does this for

[54] Fix | Delete

# you, if needed.

[55] Fix | Delete

[56] Fix | Delete

[57] Fix | Delete

import re

[58] Fix | Delete

[59] Fix | Delete

xpath_tokenizer_re = re.compile(

[60] Fix | Delete

r"("

[61] Fix | Delete

r"'[^']*'|\"[^\"]*\"|"

[62] Fix | Delete

r"::|"

[63] Fix | Delete

r"//?|"

[64] Fix | Delete

r"\.\.|"

[65] Fix | Delete

r"|"

[66] Fix | Delete

r"[/.*:\[\]@=])|"

[67] Fix | Delete

r"((?:\{[^}]+\})?[^/\[\]@=\s]+)|"

[68] Fix | Delete

r"\s+"

[69] Fix | Delete

)

[70] Fix | Delete

[71] Fix | Delete

def xpath_tokenizer(pattern, namespaces=None):

[72] Fix | Delete

for token in xpath_tokenizer_re.findall(pattern):

[73] Fix | Delete

tag = token[1]

[74] Fix | Delete

if tag and tag[0] != "{" and ":" in tag:

[75] Fix | Delete

try:

[76] Fix | Delete

prefix, uri = tag.split(":", 1)

[77] Fix | Delete

if not namespaces:

[78] Fix | Delete

raise KeyError

[79] Fix | Delete

yield token[0], "{%s}%s" % (namespaces[prefix], uri)

[80] Fix | Delete

except KeyError:

[81] Fix | Delete

raise SyntaxError("prefix %r not found in prefix map" % prefix)

[82] Fix | Delete

else:

[83] Fix | Delete

yield token

[84] Fix | Delete

[85] Fix | Delete

def get_parent_map(context):

[86] Fix | Delete

parent_map = context.parent_map

[87] Fix | Delete

if parent_map is None:

[88] Fix | Delete

context.parent_map = parent_map = {}

[89] Fix | Delete

for p in context.root.iter():

[90] Fix | Delete

for e in p:

[91] Fix | Delete

parent_map[e] = p

[92] Fix | Delete

return parent_map

[93] Fix | Delete

[94] Fix | Delete

def prepare_child(next, token):

[95] Fix | Delete

tag = token[1]

[96] Fix | Delete

def select(context, result):

[97] Fix | Delete

for elem in result:

[98] Fix | Delete

for e in elem:

[99] Fix | Delete

if e.tag == tag:

[100] Fix | Delete

yield e

[101] Fix | Delete

return select

[102] Fix | Delete

[103] Fix | Delete

def prepare_star(next, token):

[104] Fix | Delete

def select(context, result):

[105] Fix | Delete

for elem in result:

[106] Fix | Delete

yield from elem

[107] Fix | Delete

return select

[108] Fix | Delete

[109] Fix | Delete

def prepare_self(next, token):

[110] Fix | Delete

def select(context, result):

[111] Fix | Delete

yield from result

[112] Fix | Delete

return select

[113] Fix | Delete

[114] Fix | Delete

def prepare_descendant(next, token):

[115] Fix | Delete

try:

[116] Fix | Delete

token = next()

[117] Fix | Delete

except StopIteration:

[118] Fix | Delete

return

[119] Fix | Delete

if token[0] == "*":

[120] Fix | Delete

tag = "*"

[121] Fix | Delete

elif not token[0]:

[122] Fix | Delete

tag = token[1]

[123] Fix | Delete

else:

[124] Fix | Delete

raise SyntaxError("invalid descendant")

[125] Fix | Delete

def select(context, result):

[126] Fix | Delete

for elem in result:

[127] Fix | Delete

for e in elem.iter(tag):

[128] Fix | Delete

if e is not elem:

[129] Fix | Delete

yield e

[130] Fix | Delete

return select

[131] Fix | Delete

[132] Fix | Delete

def prepare_parent(next, token):

[133] Fix | Delete

def select(context, result):

[134] Fix | Delete

# FIXME: raise error if .. is applied at toplevel?

[135] Fix | Delete

parent_map = get_parent_map(context)

[136] Fix | Delete

result_map = {}

[137] Fix | Delete

for elem in result:

[138] Fix | Delete

if elem in parent_map:

[139] Fix | Delete

parent = parent_map[elem]

[140] Fix | Delete

if parent not in result_map:

[141] Fix | Delete

result_map[parent] = None

[142] Fix | Delete

yield parent

[143] Fix | Delete

return select

[144] Fix | Delete

[145] Fix | Delete

def prepare_predicate(next, token):

[146] Fix | Delete

# FIXME: replace with real parser!!! refs:

[147] Fix | Delete

# http://effbot.org/zone/simple-iterator-parser.htm

[148] Fix | Delete

# http://javascript.crockford.com/tdop/tdop.html

[149] Fix | Delete

signature = []

[150] Fix | Delete

predicate = []

[151] Fix | Delete

while 1:

[152] Fix | Delete

try:

[153] Fix | Delete

token = next()

[154] Fix | Delete

except StopIteration:

[155] Fix | Delete

return

[156] Fix | Delete

if token[0] == "]":

[157] Fix | Delete

break

[158] Fix | Delete

if token[0] and token[0][:1] in "'\"":

[159] Fix | Delete

token = "'", token[0][1:-1]

[160] Fix | Delete

signature.append(token[0] or "-")

[161] Fix | Delete

predicate.append(token[1])

[162] Fix | Delete

signature = "".join(signature)

[163] Fix | Delete

# use signature to determine predicate type

[164] Fix | Delete

if signature == "@-":

[165] Fix | Delete

# [@attribute] predicate

[166] Fix | Delete

key = predicate[1]

[167] Fix | Delete

def select(context, result):

[168] Fix | Delete

for elem in result:

[169] Fix | Delete

if elem.get(key) is not None:

[170] Fix | Delete

yield elem

[171] Fix | Delete

return select

[172] Fix | Delete

if signature == "@-='":

[173] Fix | Delete

# [@attribute='value']

[174] Fix | Delete

key = predicate[1]

[175] Fix | Delete

value = predicate[-1]

[176] Fix | Delete

def select(context, result):

[177] Fix | Delete

for elem in result:

[178] Fix | Delete

if elem.get(key) == value:

[179] Fix | Delete

yield elem

[180] Fix | Delete

return select

[181] Fix | Delete

if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):

[182] Fix | Delete

# [tag]

[183] Fix | Delete

tag = predicate[0]

[184] Fix | Delete

def select(context, result):

[185] Fix | Delete

for elem in result:

[186] Fix | Delete

if elem.find(tag) is not None:

[187] Fix | Delete

yield elem

[188] Fix | Delete

return select

[189] Fix | Delete

if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):

[190] Fix | Delete

# [tag='value']

[191] Fix | Delete

tag = predicate[0]

[192] Fix | Delete

value = predicate[-1]

[193] Fix | Delete

def select(context, result):

[194] Fix | Delete

for elem in result:

[195] Fix | Delete

for e in elem.findall(tag):

[196] Fix | Delete

if "".join(e.itertext()) == value:

[197] Fix | Delete

yield elem

[198] Fix | Delete

break

[199] Fix | Delete

return select

[200] Fix | Delete

if signature == "-" or signature == "-()" or signature == "-()-":

[201] Fix | Delete

# [index] or [last()] or [last()-index]

[202] Fix | Delete

if signature == "-":

[203] Fix | Delete

# [index]

[204] Fix | Delete

index = int(predicate[0]) - 1

[205] Fix | Delete

if index < 0:

[206] Fix | Delete

raise SyntaxError("XPath position >= 1 expected")

[207] Fix | Delete

else:

[208] Fix | Delete

if predicate[0] != "last":

[209] Fix | Delete

raise SyntaxError("unsupported function")

[210] Fix | Delete

if signature == "-()-":

[211] Fix | Delete

try:

[212] Fix | Delete

index = int(predicate[2]) - 1

[213] Fix | Delete

except ValueError:

[214] Fix | Delete

raise SyntaxError("unsupported expression")

[215] Fix | Delete

if index > -2:

[216] Fix | Delete

raise SyntaxError("XPath offset from last() must be negative")

[217] Fix | Delete

else:

[218] Fix | Delete

index = -1

[219] Fix | Delete

def select(context, result):

[220] Fix | Delete

parent_map = get_parent_map(context)

[221] Fix | Delete

for elem in result:

[222] Fix | Delete

try:

[223] Fix | Delete

parent = parent_map[elem]

[224] Fix | Delete

# FIXME: what if the selector is "*" ?

[225] Fix | Delete

elems = list(parent.findall(elem.tag))

[226] Fix | Delete

if elems[index] is elem:

[227] Fix | Delete

yield elem

[228] Fix | Delete

except (IndexError, KeyError):

[229] Fix | Delete

pass

[230] Fix | Delete

return select

[231] Fix | Delete

raise SyntaxError("invalid predicate")

[232] Fix | Delete

[233] Fix | Delete

ops = {

[234] Fix | Delete

"": prepare_child,

[235] Fix | Delete

"*": prepare_star,

[236] Fix | Delete

".": prepare_self,

[237] Fix | Delete

"..": prepare_parent,

[238] Fix | Delete

"//": prepare_descendant,

[239] Fix | Delete

"[": prepare_predicate,

[240] Fix | Delete

}

[241] Fix | Delete

[242] Fix | Delete

_cache = {}

[243] Fix | Delete

[244] Fix | Delete

class _SelectorContext:

[245] Fix | Delete

parent_map = None

[246] Fix | Delete

def __init__(self, root):

[247] Fix | Delete

self.root = root

[248] Fix | Delete

[249] Fix | Delete

# --------------------------------------------------------------------

[250] Fix | Delete

[251] Fix | Delete

[252] Fix | Delete

# Generate all matching objects.

[253] Fix | Delete

[254] Fix | Delete

def iterfind(elem, path, namespaces=None):

[255] Fix | Delete

# compile selector pattern

[256] Fix | Delete

cache_key = (path, None if namespaces is None

[257] Fix | Delete

else tuple(sorted(namespaces.items())))

[258] Fix | Delete

if path[-1:] == "/":

[259] Fix | Delete

path = path + "*" # implicit all (FIXME: keep this?)

[260] Fix | Delete

try:

[261] Fix | Delete

selector = _cache[cache_key]

[262] Fix | Delete

except KeyError:

[263] Fix | Delete

if len(_cache) > 100:

[264] Fix | Delete

_cache.clear()

[265] Fix | Delete

if path[:1] == "/":

[266] Fix | Delete

raise SyntaxError("cannot use absolute path on element")

[267] Fix | Delete

next = iter(xpath_tokenizer(path, namespaces)).__next__

[268] Fix | Delete

try:

[269] Fix | Delete

token = next()

[270] Fix | Delete

except StopIteration:

[271] Fix | Delete

return

[272] Fix | Delete

selector = []

[273] Fix | Delete

while 1:

[274] Fix | Delete

try:

[275] Fix | Delete

selector.append(ops[token[0]](next, token))

[276] Fix | Delete

except StopIteration:

[277] Fix | Delete

raise SyntaxError("invalid path")

[278] Fix | Delete

try:

[279] Fix | Delete

token = next()

[280] Fix | Delete

if token[0] == "/":

[281] Fix | Delete

token = next()

[282] Fix | Delete

except StopIteration:

[283] Fix | Delete

break

[284] Fix | Delete

_cache[cache_key] = selector

[285] Fix | Delete

# execute selector pattern

[286] Fix | Delete

result = [elem]

[287] Fix | Delete

context = _SelectorContext(elem)

[288] Fix | Delete

for select in selector:

[289] Fix | Delete

result = select(context, result)

[290] Fix | Delete

return result

[291] Fix | Delete

[292] Fix | Delete

[293] Fix | Delete

# Find first matching object.

[294] Fix | Delete

[295] Fix | Delete

def find(elem, path, namespaces=None):

[296] Fix | Delete

return next(iterfind(elem, path, namespaces), None)

[297] Fix | Delete

[298] Fix | Delete

[299] Fix | Delete

# Find all matching objects.

[300] Fix | Delete

[301] Fix | Delete

def findall(elem, path, namespaces=None):

[302] Fix | Delete

return list(iterfind(elem, path, namespaces))

[303] Fix | Delete

[304] Fix | Delete

[305] Fix | Delete

# Find text for first matching object.

[306] Fix | Delete

[307] Fix | Delete

def findtext(elem, path, default=None, namespaces=None):

[308] Fix | Delete

try:

[309] Fix | Delete

elem = next(iterfind(elem, path, namespaces))

[310] Fix | Delete

return elem.text or ""

[311] Fix | Delete

except StopIteration:

[312] Fix | Delete

return default

[313] Fix | Delete

[314] Fix | Delete