Edit File by line

# Use of this source code is governed by a BSD-style license that can be

[0] Fix | Delete

# found in the LICENSE file.

[1] Fix | Delete

__license__ = "MIT"

[2] Fix | Delete

[3] Fix | Delete

try:

[4] Fix | Delete

from collections.abc import Callable # Python 3.6

[5] Fix | Delete

except ImportError as e:

[6] Fix | Delete

from collections import Callable

[7] Fix | Delete

import re

[8] Fix | Delete

import shlex

[9] Fix | Delete

import sys

[10] Fix | Delete

import warnings

[11] Fix | Delete

from bs4.dammit import EntitySubstitution

[12] Fix | Delete

[13] Fix | Delete

DEFAULT_OUTPUT_ENCODING = "utf-8"

[14] Fix | Delete

PY3K = (sys.version_info[0] > 2)

[15] Fix | Delete

[16] Fix | Delete

whitespace_re = re.compile(r"\s+")

[17] Fix | Delete

[18] Fix | Delete

def _alias(attr):

[19] Fix | Delete

"""Alias one attribute name to another for backward compatibility"""

[20] Fix | Delete

@property

[21] Fix | Delete

def alias(self):

[22] Fix | Delete

return getattr(self, attr)

[23] Fix | Delete

[24] Fix | Delete

@alias.setter

[25] Fix | Delete

def alias(self):

[26] Fix | Delete

return setattr(self, attr)

[27] Fix | Delete

return alias

[28] Fix | Delete

[29] Fix | Delete

[30] Fix | Delete

class NamespacedAttribute(str):

[31] Fix | Delete

[32] Fix | Delete

def __new__(cls, prefix, name, namespace=None):

[33] Fix | Delete

if name is None:

[34] Fix | Delete

obj = str.__new__(cls, prefix)

[35] Fix | Delete

elif prefix is None:

[36] Fix | Delete

# Not really namespaced.

[37] Fix | Delete

obj = str.__new__(cls, name)

[38] Fix | Delete

else:

[39] Fix | Delete

obj = str.__new__(cls, prefix + ":" + name)

[40] Fix | Delete

obj.prefix = prefix

[41] Fix | Delete

obj.name = name

[42] Fix | Delete

obj.namespace = namespace

[43] Fix | Delete

return obj

[44] Fix | Delete

[45] Fix | Delete

class AttributeValueWithCharsetSubstitution(str):

[46] Fix | Delete

"""A stand-in object for a character encoding specified in HTML."""

[47] Fix | Delete

[48] Fix | Delete

class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):

[49] Fix | Delete

"""A generic stand-in for the value of a meta tag's 'charset' attribute.

[50] Fix | Delete

[51] Fix | Delete

When Beautiful Soup parses the markup '<meta charset="utf8">', the

[52] Fix | Delete

value of the 'charset' attribute will be one of these objects.

[53] Fix | Delete

"""

[54] Fix | Delete

[55] Fix | Delete

def __new__(cls, original_value):

[56] Fix | Delete

obj = str.__new__(cls, original_value)

[57] Fix | Delete

obj.original_value = original_value

[58] Fix | Delete

return obj

[59] Fix | Delete

[60] Fix | Delete

def encode(self, encoding):

[61] Fix | Delete

return encoding

[62] Fix | Delete

[63] Fix | Delete

[64] Fix | Delete

class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):

[65] Fix | Delete

"""A generic stand-in for the value of a meta tag's 'content' attribute.

[66] Fix | Delete

[67] Fix | Delete

When Beautiful Soup parses the markup:

[68] Fix | Delete

[69] Fix | Delete

[70] Fix | Delete

The value of the 'content' attribute will be one of these objects.

[71] Fix | Delete

"""

[72] Fix | Delete

[73] Fix | Delete

CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)

[74] Fix | Delete

[75] Fix | Delete

def __new__(cls, original_value):

[76] Fix | Delete

match = cls.CHARSET_RE.search(original_value)

[77] Fix | Delete

if match is None:

[78] Fix | Delete

# No substitution necessary.

[79] Fix | Delete

return str.__new__(str, original_value)

[80] Fix | Delete

[81] Fix | Delete

obj = str.__new__(cls, original_value)

[82] Fix | Delete

obj.original_value = original_value

[83] Fix | Delete

return obj

[84] Fix | Delete

[85] Fix | Delete

def encode(self, encoding):

[86] Fix | Delete

def rewrite(match):

[87] Fix | Delete

return match.group(1) + encoding

[88] Fix | Delete

return self.CHARSET_RE.sub(rewrite, self.original_value)

[89] Fix | Delete

[90] Fix | Delete

class HTMLAwareEntitySubstitution(EntitySubstitution):

[91] Fix | Delete

[92] Fix | Delete

"""Entity substitution rules that are aware of some HTML quirks.

[93] Fix | Delete

[94] Fix | Delete

Specifically, the contents of <script> and <style> tags should not

[95] Fix | Delete

undergo entity substitution.

[96] Fix | Delete

[97] Fix | Delete

Incoming NavigableString objects are checked to see if they're the

[98] Fix | Delete

direct children of a <script> or <style> tag.

[99] Fix | Delete

"""

[100] Fix | Delete

[101] Fix | Delete

cdata_containing_tags = set(["script", "style"])

[102] Fix | Delete

[103] Fix | Delete

preformatted_tags = set(["pre"])

[104] Fix | Delete

[105] Fix | Delete

preserve_whitespace_tags = set(['pre', 'textarea'])

[106] Fix | Delete

[107] Fix | Delete

@classmethod

[108] Fix | Delete

def _substitute_if_appropriate(cls, ns, f):

[109] Fix | Delete

if (isinstance(ns, NavigableString)

[110] Fix | Delete

and ns.parent is not None

[111] Fix | Delete

and ns.parent.name in cls.cdata_containing_tags):

[112] Fix | Delete

# Do nothing.

[113] Fix | Delete

return ns

[114] Fix | Delete

# Substitute.

[115] Fix | Delete

return f(ns)

[116] Fix | Delete

[117] Fix | Delete

@classmethod

[118] Fix | Delete

def substitute_html(cls, ns):

[119] Fix | Delete

return cls._substitute_if_appropriate(

[120] Fix | Delete

ns, EntitySubstitution.substitute_html)

[121] Fix | Delete

[122] Fix | Delete

@classmethod

[123] Fix | Delete

def substitute_xml(cls, ns):

[124] Fix | Delete

return cls._substitute_if_appropriate(

[125] Fix | Delete

ns, EntitySubstitution.substitute_xml)

[126] Fix | Delete

[127] Fix | Delete

class Formatter(object):

[128] Fix | Delete

"""Contains information about how to format a parse tree."""

[129] Fix | Delete

[130] Fix | Delete

# By default, represent void elements as <tag/> rather than <tag>

[131] Fix | Delete

void_element_close_prefix = '/'

[132] Fix | Delete

[133] Fix | Delete

def substitute_entities(self, *args, **kwargs):

[134] Fix | Delete

"""Transform certain characters into named entities."""

[135] Fix | Delete

raise NotImplementedError()

[136] Fix | Delete

[137] Fix | Delete

class HTMLFormatter(Formatter):

[138] Fix | Delete

"""The default HTML formatter."""

[139] Fix | Delete

def substitute(self, *args, **kwargs):

[140] Fix | Delete

return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)

[141] Fix | Delete

[142] Fix | Delete

class MinimalHTMLFormatter(Formatter):

[143] Fix | Delete

"""A minimal HTML formatter."""

[144] Fix | Delete

def substitute(self, *args, **kwargs):

[145] Fix | Delete

return HTMLAwareEntitySubstitution.substitute_xml(*args, **kwargs)

[146] Fix | Delete

[147] Fix | Delete

class HTML5Formatter(HTMLFormatter):

[148] Fix | Delete

"""An HTML formatter that omits the slash in a void tag."""

[149] Fix | Delete

void_element_close_prefix = None

[150] Fix | Delete

[151] Fix | Delete

class XMLFormatter(Formatter):

[152] Fix | Delete

"""Substitute only the essential XML entities."""

[153] Fix | Delete

def substitute(self, *args, **kwargs):

[154] Fix | Delete

return EntitySubstitution.substitute_xml(*args, **kwargs)

[155] Fix | Delete

[156] Fix | Delete

class HTMLXMLFormatter(Formatter):

[157] Fix | Delete

"""Format XML using HTML rules."""

[158] Fix | Delete

def substitute(self, *args, **kwargs):

[159] Fix | Delete

return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)

[160] Fix | Delete

[161] Fix | Delete

[162] Fix | Delete

class PageElement(object):

[163] Fix | Delete

"""Contains the navigational information for some part of the page

[164] Fix | Delete

(either a tag or a piece of text)"""

[165] Fix | Delete

[166] Fix | Delete

# There are five possible values for the "formatter" argument passed in

[167] Fix | Delete

# to methods like encode() and prettify():

[168] Fix | Delete

[169] Fix | Delete

# "html" - All Unicode characters with corresponding HTML entities

[170] Fix | Delete

# are converted to those entities on output.

[171] Fix | Delete

# "html5" - The same as "html", but empty void tags are represented as

[172] Fix | Delete

# <tag> rather than <tag/>

[173] Fix | Delete

# "minimal" - Bare ampersands and angle brackets are converted to

[174] Fix | Delete

# XML entities: & < >

[175] Fix | Delete

# None - The null formatter. Unicode characters are never

[176] Fix | Delete

# converted to entities. This is not recommended, but it's

[177] Fix | Delete

# faster than "minimal".

[178] Fix | Delete

# A callable function - it will be called on every string that needs to undergo entity substitution.

[179] Fix | Delete

# A Formatter instance - Formatter.substitute(string) will be called on every string that

[180] Fix | Delete

# needs to undergo entity substitution.

[181] Fix | Delete

[182] Fix | Delete

[183] Fix | Delete

# In an HTML document, the default "html", "html5", and "minimal"

[184] Fix | Delete

# functions will leave the contents of <script> and <style> tags

[185] Fix | Delete

# alone. For an XML document, all tags will be given the same

[186] Fix | Delete

# treatment.

[187] Fix | Delete

[188] Fix | Delete

HTML_FORMATTERS = {

[189] Fix | Delete

"html" : HTMLFormatter(),

[190] Fix | Delete

"html5" : HTML5Formatter(),

[191] Fix | Delete

"minimal" : MinimalHTMLFormatter(),

[192] Fix | Delete

None : None

[193] Fix | Delete

}

[194] Fix | Delete

[195] Fix | Delete

XML_FORMATTERS = {

[196] Fix | Delete

"html" : HTMLXMLFormatter(),

[197] Fix | Delete

"minimal" : XMLFormatter(),

[198] Fix | Delete

None : None

[199] Fix | Delete

}

[200] Fix | Delete

[201] Fix | Delete

def format_string(self, s, formatter='minimal'):

[202] Fix | Delete

"""Format the given string using the given formatter."""

[203] Fix | Delete

if isinstance(formatter, str):

[204] Fix | Delete

formatter = self._formatter_for_name(formatter)

[205] Fix | Delete

if formatter is None:

[206] Fix | Delete

output = s

[207] Fix | Delete

else:

[208] Fix | Delete

if callable(formatter):

[209] Fix | Delete

# Backwards compatibility -- you used to pass in a formatting method.

[210] Fix | Delete

output = formatter(s)

[211] Fix | Delete

else:

[212] Fix | Delete

output = formatter.substitute(s)

[213] Fix | Delete

return output

[214] Fix | Delete

[215] Fix | Delete

@property

[216] Fix | Delete

def _is_xml(self):

[217] Fix | Delete

"""Is this element part of an XML tree or an HTML tree?

[218] Fix | Delete

[219] Fix | Delete

This is used when mapping a formatter name ("minimal") to an

[220] Fix | Delete

appropriate function (one that performs entity-substitution on

[221] Fix | Delete

the contents of <script> and <style> tags, or not). It can be

[222] Fix | Delete

inefficient, but it should be called very rarely.

[223] Fix | Delete

"""

[224] Fix | Delete

if self.known_xml is not None:

[225] Fix | Delete

# Most of the time we will have determined this when the

[226] Fix | Delete

# document is parsed.

[227] Fix | Delete

return self.known_xml

[228] Fix | Delete

[229] Fix | Delete

# Otherwise, it's likely that this element was created by

[230] Fix | Delete

# direct invocation of the constructor from within the user's

[231] Fix | Delete

# Python code.

[232] Fix | Delete

if self.parent is None:

[233] Fix | Delete

# This is the top-level object. It should have .known_xml set

[234] Fix | Delete

# from tree creation. If not, take a guess--BS is usually

[235] Fix | Delete

# used on HTML markup.

[236] Fix | Delete

return getattr(self, 'is_xml', False)

[237] Fix | Delete

return self.parent._is_xml

[238] Fix | Delete

[239] Fix | Delete

def _formatter_for_name(self, name):

[240] Fix | Delete

"Look up a formatter function based on its name and the tree."

[241] Fix | Delete

if self._is_xml:

[242] Fix | Delete

return self.XML_FORMATTERS.get(name, XMLFormatter())

[243] Fix | Delete

else:

[244] Fix | Delete

return self.HTML_FORMATTERS.get(name, HTMLFormatter())

[245] Fix | Delete

[246] Fix | Delete

def setup(self, parent=None, previous_element=None, next_element=None,

[247] Fix | Delete

previous_sibling=None, next_sibling=None):

[248] Fix | Delete

"""Sets up the initial relations between this element and

[249] Fix | Delete

other elements."""

[250] Fix | Delete

self.parent = parent

[251] Fix | Delete

[252] Fix | Delete

self.previous_element = previous_element

[253] Fix | Delete

if previous_element is not None:

[254] Fix | Delete

self.previous_element.next_element = self

[255] Fix | Delete

[256] Fix | Delete

self.next_element = next_element

[257] Fix | Delete

if self.next_element:

[258] Fix | Delete

self.next_element.previous_element = self

[259] Fix | Delete

[260] Fix | Delete

self.next_sibling = next_sibling

[261] Fix | Delete

if self.next_sibling:

[262] Fix | Delete

self.next_sibling.previous_sibling = self

[263] Fix | Delete

[264] Fix | Delete

if (not previous_sibling

[265] Fix | Delete

and self.parent is not None and self.parent.contents):

[266] Fix | Delete

previous_sibling = self.parent.contents[-1]

[267] Fix | Delete

[268] Fix | Delete

self.previous_sibling = previous_sibling

[269] Fix | Delete

if previous_sibling:

[270] Fix | Delete

self.previous_sibling.next_sibling = self

[271] Fix | Delete

[272] Fix | Delete

nextSibling = _alias("next_sibling") # BS3

[273] Fix | Delete

previousSibling = _alias("previous_sibling") # BS3

[274] Fix | Delete

[275] Fix | Delete

def replace_with(self, replace_with):

[276] Fix | Delete

if not self.parent:

[277] Fix | Delete

raise ValueError(

[278] Fix | Delete

"Cannot replace one element with another when the"

[279] Fix | Delete

"element to be replaced is not part of a tree.")

[280] Fix | Delete

if replace_with is self:

[281] Fix | Delete

return

[282] Fix | Delete

if replace_with is self.parent:

[283] Fix | Delete

raise ValueError("Cannot replace a Tag with its parent.")

[284] Fix | Delete

old_parent = self.parent

[285] Fix | Delete

my_index = self.parent.index(self)

[286] Fix | Delete

self.extract()

[287] Fix | Delete

old_parent.insert(my_index, replace_with)

[288] Fix | Delete

return self

[289] Fix | Delete

replaceWith = replace_with # BS3

[290] Fix | Delete

[291] Fix | Delete

def unwrap(self):

[292] Fix | Delete

my_parent = self.parent

[293] Fix | Delete

if not self.parent:

[294] Fix | Delete

raise ValueError(

[295] Fix | Delete

"Cannot replace an element with its contents when that"

[296] Fix | Delete

"element is not part of a tree.")

[297] Fix | Delete

my_index = self.parent.index(self)

[298] Fix | Delete

self.extract()

[299] Fix | Delete

for child in reversed(self.contents[:]):

[300] Fix | Delete

my_parent.insert(my_index, child)

[301] Fix | Delete

return self

[302] Fix | Delete

replace_with_children = unwrap

[303] Fix | Delete

replaceWithChildren = unwrap # BS3

[304] Fix | Delete

[305] Fix | Delete

def wrap(self, wrap_inside):

[306] Fix | Delete

me = self.replace_with(wrap_inside)

[307] Fix | Delete

wrap_inside.append(me)

[308] Fix | Delete

return wrap_inside

[309] Fix | Delete

[310] Fix | Delete

def extract(self):

[311] Fix | Delete

"""Destructively rips this element out of the tree."""

[312] Fix | Delete

if self.parent is not None:

[313] Fix | Delete

del self.parent.contents[self.parent.index(self)]

[314] Fix | Delete

[315] Fix | Delete

#Find the two elements that would be next to each other if

[316] Fix | Delete

#this element (and any children) hadn't been parsed. Connect

[317] Fix | Delete

#the two.

[318] Fix | Delete

last_child = self._last_descendant()

[319] Fix | Delete

next_element = last_child.next_element

[320] Fix | Delete

[321] Fix | Delete

if (self.previous_element is not None and

[322] Fix | Delete

self.previous_element is not next_element):

[323] Fix | Delete

self.previous_element.next_element = next_element

[324] Fix | Delete

if next_element is not None and next_element is not self.previous_element:

[325] Fix | Delete

next_element.previous_element = self.previous_element

[326] Fix | Delete

self.previous_element = None

[327] Fix | Delete

last_child.next_element = None

[328] Fix | Delete

[329] Fix | Delete

self.parent = None

[330] Fix | Delete

if (self.previous_sibling is not None

[331] Fix | Delete

and self.previous_sibling is not self.next_sibling):

[332] Fix | Delete

self.previous_sibling.next_sibling = self.next_sibling

[333] Fix | Delete

if (self.next_sibling is not None

[334] Fix | Delete

and self.next_sibling is not self.previous_sibling):

[335] Fix | Delete

self.next_sibling.previous_sibling = self.previous_sibling

[336] Fix | Delete

self.previous_sibling = self.next_sibling = None

[337] Fix | Delete

return self

[338] Fix | Delete

[339] Fix | Delete

def _last_descendant(self, is_initialized=True, accept_self=True):

[340] Fix | Delete

"Finds the last element beneath this object to be parsed."

[341] Fix | Delete

if is_initialized and self.next_sibling:

[342] Fix | Delete

last_child = self.next_sibling.previous_element

[343] Fix | Delete

else:

[344] Fix | Delete

last_child = self

[345] Fix | Delete

while isinstance(last_child, Tag) and last_child.contents:

[346] Fix | Delete

last_child = last_child.contents[-1]

[347] Fix | Delete

if not accept_self and last_child is self:

[348] Fix | Delete

last_child = None

[349] Fix | Delete

return last_child

[350] Fix | Delete

# BS3: Not part of the API!

[351] Fix | Delete

_lastRecursiveChild = _last_descendant

[352] Fix | Delete

[353] Fix | Delete

def insert(self, position, new_child):

[354] Fix | Delete

if new_child is None:

[355] Fix | Delete

raise ValueError("Cannot insert None into a tag.")

[356] Fix | Delete

if new_child is self:

[357] Fix | Delete

raise ValueError("Cannot insert a tag into itself.")

[358] Fix | Delete

if (isinstance(new_child, str)

[359] Fix | Delete

and not isinstance(new_child, NavigableString)):

[360] Fix | Delete

new_child = NavigableString(new_child)

[361] Fix | Delete

[362] Fix | Delete

from bs4 import BeautifulSoup

[363] Fix | Delete

if isinstance(new_child, BeautifulSoup):

[364] Fix | Delete

# We don't want to end up with a situation where one BeautifulSoup

[365] Fix | Delete

# object contains another. Insert the children one at a time.

[366] Fix | Delete

for subchild in list(new_child.contents):

[367] Fix | Delete

self.insert(position, subchild)

[368] Fix | Delete

position += 1

[369] Fix | Delete

return

[370] Fix | Delete

position = min(position, len(self.contents))

[371] Fix | Delete

if hasattr(new_child, 'parent') and new_child.parent is not None:

[372] Fix | Delete

# We're 'inserting' an element that's already one

[373] Fix | Delete

# of this object's children.

[374] Fix | Delete

if new_child.parent is self:

[375] Fix | Delete

current_index = self.index(new_child)

[376] Fix | Delete

if current_index < position:

[377] Fix | Delete

# We're moving this element further down the list

[378] Fix | Delete

# of this object's children. That means that when

[379] Fix | Delete

# we extract this element, our target index will

[380] Fix | Delete

# jump down one.

[381] Fix | Delete

position -= 1

[382] Fix | Delete

new_child.extract()

[383] Fix | Delete

[384] Fix | Delete

new_child.parent = self

[385] Fix | Delete

previous_child = None

[386] Fix | Delete

if position == 0:

[387] Fix | Delete

new_child.previous_sibling = None

[388] Fix | Delete

new_child.previous_element = self

[389] Fix | Delete

else:

[390] Fix | Delete

previous_child = self.contents[position - 1]

[391] Fix | Delete

new_child.previous_sibling = previous_child

[392] Fix | Delete

new_child.previous_sibling.next_sibling = new_child

[393] Fix | Delete

new_child.previous_element = previous_child._last_descendant(False)

[394] Fix | Delete

if new_child.previous_element is not None:

[395] Fix | Delete

new_child.previous_element.next_element = new_child

[396] Fix | Delete

[397] Fix | Delete

new_childs_last_element = new_child._last_descendant(False)

[398] Fix | Delete

[399] Fix | Delete

if position >= len(self.contents):

[400] Fix | Delete

new_child.next_sibling = None

[401] Fix | Delete

[402] Fix | Delete

parent = self

[403] Fix | Delete

parents_next_sibling = None

[404] Fix | Delete

while parents_next_sibling is None and parent is not None:

[405] Fix | Delete

parents_next_sibling = parent.next_sibling

[406] Fix | Delete

parent = parent.parent

[407] Fix | Delete

if parents_next_sibling is not None:

[408] Fix | Delete

# We found the element that comes next in the document.

[409] Fix | Delete

break

[410] Fix | Delete

if parents_next_sibling is not None:

[411] Fix | Delete

new_childs_last_element.next_element = parents_next_sibling

[412] Fix | Delete

else:

[413] Fix | Delete

# The last element of this tag is the last element in

[414] Fix | Delete

# the document.

[415] Fix | Delete

new_childs_last_element.next_element = None

[416] Fix | Delete

else:

[417] Fix | Delete

next_child = self.contents[position]

[418] Fix | Delete

new_child.next_sibling = next_child

[419] Fix | Delete

if new_child.next_sibling is not None:

[420] Fix | Delete

new_child.next_sibling.previous_sibling = new_child

[421] Fix | Delete

new_childs_last_element.next_element = next_child

[422] Fix | Delete

[423] Fix | Delete

if new_childs_last_element.next_element is not None:

[424] Fix | Delete

new_childs_last_element.next_element.previous_element = new_childs_last_element

[425] Fix | Delete

self.contents.insert(position, new_child)

[426] Fix | Delete

[427] Fix | Delete

def append(self, tag):

[428] Fix | Delete

"""Appends the given tag to the contents of this tag."""

[429] Fix | Delete

self.insert(len(self.contents), tag)

[430] Fix | Delete

[431] Fix | Delete

def insert_before(self, predecessor):

[432] Fix | Delete

"""Makes the given element the immediate predecessor of this one.

[433] Fix | Delete

[434] Fix | Delete

The two elements will have the same parent, and the given element

[435] Fix | Delete

will be immediately before this one.

[436] Fix | Delete

"""

[437] Fix | Delete

if self is predecessor:

[438] Fix | Delete

raise ValueError("Can't insert an element before itself.")

[439] Fix | Delete

parent = self.parent

[440] Fix | Delete

if parent is None:

[441] Fix | Delete

raise ValueError(

[442] Fix | Delete

"Element has no parent, so 'before' has no meaning.")

[443] Fix | Delete

# Extract first so that the index won't be screwed up if they

[444] Fix | Delete

# are siblings.

[445] Fix | Delete

if isinstance(predecessor, PageElement):

[446] Fix | Delete

predecessor.extract()

[447] Fix | Delete

index = parent.index(self)

[448] Fix | Delete

parent.insert(index, predecessor)

[449] Fix | Delete

[450] Fix | Delete

def insert_after(self, successor):

[451] Fix | Delete

"""Makes the given element the immediate successor of this one.

[452] Fix | Delete

[453] Fix | Delete

The two elements will have the same parent, and the given element

[454] Fix | Delete

will be immediately after this one.

[455] Fix | Delete

"""

[456] Fix | Delete

if self is successor:

[457] Fix | Delete

raise ValueError("Can't insert an element after itself.")

[458] Fix | Delete

parent = self.parent

[459] Fix | Delete

if parent is None:

[460] Fix | Delete

raise ValueError(

[461] Fix | Delete

"Element has no parent, so 'after' has no meaning.")

[462] Fix | Delete

# Extract first so that the index won't be screwed up if they

[463] Fix | Delete

# are siblings.

[464] Fix | Delete

if isinstance(successor, PageElement):

[465] Fix | Delete

successor.extract()

[466] Fix | Delete

index = parent.index(self)

[467] Fix | Delete

parent.insert(index+1, successor)

[468] Fix | Delete

[469] Fix | Delete

def find_next(self, name=None, attrs={}, text=None, **kwargs):

[470] Fix | Delete

"""Returns the first item that matches the given criteria and

[471] Fix | Delete

appears after this Tag in the document."""

[472] Fix | Delete

return self._find_one(self.find_all_next, name, attrs, text, **kwargs)

[473] Fix | Delete

findNext = find_next # BS3

[474] Fix | Delete

[475] Fix | Delete

def find_all_next(self, name=None, attrs={}, text=None, limit=None,

[476] Fix | Delete

**kwargs):

[477] Fix | Delete

"""Returns all items that match the given criteria and appear

[478] Fix | Delete

after this Tag in the document."""

[479] Fix | Delete

return self._find_all(name, attrs, text, limit, self.next_elements,

[480] Fix | Delete

**kwargs)

[481] Fix | Delete

findAllNext = find_all_next # BS3

[482] Fix | Delete

[483] Fix | Delete

def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):

[484] Fix | Delete

"""Returns the closest sibling to this Tag that matches the

[485] Fix | Delete

given criteria and appears after this Tag in the document."""

[486] Fix | Delete

return self._find_one(self.find_next_siblings, name, attrs, text,

[487] Fix | Delete

**kwargs)

[488] Fix | Delete

findNextSibling = find_next_sibling # BS3

[489] Fix | Delete

[490] Fix | Delete

def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,

[491] Fix | Delete

**kwargs):

[492] Fix | Delete

"""Returns the siblings of this Tag that match the given

[493] Fix | Delete

criteria and appear after this Tag in the document."""

[494] Fix | Delete

return self._find_all(name, attrs, text, limit,

[495] Fix | Delete

self.next_siblings, **kwargs)

[496] Fix | Delete

findNextSiblings = find_next_siblings # BS3

[497] Fix | Delete

fetchNextSiblings = find_next_siblings # BS2

[498] Fix | Delete

[499] Fix | Delete

12 3 4