530 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			530 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""Beautiful Soup
 | 
						|
Elixir and Tonic
 | 
						|
"The Screen-Scraper's Friend"
 | 
						|
http://www.crummy.com/software/BeautifulSoup/
 | 
						|
 | 
						|
Beautiful Soup uses a pluggable XML or HTML parser to parse a
 | 
						|
(possibly invalid) document into a tree representation. Beautiful Soup
 | 
						|
provides methods and Pythonic idioms that make it easy to navigate,
 | 
						|
search, and modify the parse tree.
 | 
						|
 | 
						|
Beautiful Soup works with Python 2.7 and up. It works better if lxml
 | 
						|
and/or html5lib is installed.
 | 
						|
 | 
						|
For more than you ever wanted to know about Beautiful Soup, see the
 | 
						|
documentation:
 | 
						|
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 | 
						|
 | 
						|
"""
 | 
						|
 | 
						|
# Use of this source code is governed by a BSD-style license that can be
 | 
						|
# found in the LICENSE file.
 | 
						|
 | 
						|
__author__ = "Leonard Richardson (leonardr@segfault.org)"
 | 
						|
__version__ = "4.6.0"
 | 
						|
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
 | 
						|
__license__ = "MIT"
 | 
						|
 | 
						|
__all__ = ['BeautifulSoup']
 | 
						|
 | 
						|
import os
 | 
						|
import re
 | 
						|
import traceback
 | 
						|
import warnings
 | 
						|
 | 
						|
from .builder import builder_registry, ParserRejectedMarkup
 | 
						|
from .dammit import UnicodeDammit
 | 
						|
from .element import (
 | 
						|
    CData,
 | 
						|
    Comment,
 | 
						|
    DEFAULT_OUTPUT_ENCODING,
 | 
						|
    Declaration,
 | 
						|
    Doctype,
 | 
						|
    NavigableString,
 | 
						|
    PageElement,
 | 
						|
    ProcessingInstruction,
 | 
						|
    ResultSet,
 | 
						|
    SoupStrainer,
 | 
						|
    Tag,
 | 
						|
    )
 | 
						|
 | 
						|
# The very first thing we do is give a useful error if someone is
 | 
						|
# running this code under Python 3 without converting it.
 | 
						|
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 | 
						|
 | 
						|
class BeautifulSoup(Tag):
 | 
						|
    """
 | 
						|
    This class defines the basic interface called by the tree builders.
 | 
						|
 | 
						|
    These methods will be called by the parser:
 | 
						|
      reset()
 | 
						|
      feed(markup)
 | 
						|
 | 
						|
    The tree builder may call these methods from its feed() implementation:
 | 
						|
      handle_starttag(name, attrs) # See note about return value
 | 
						|
      handle_endtag(name)
 | 
						|
      handle_data(data) # Appends to the current data node
 | 
						|
      endData(containerClass=NavigableString) # Ends the current data node
 | 
						|
 | 
						|
    No matter how complicated the underlying parser is, you should be
 | 
						|
    able to build a tree using 'start tag' events, 'end tag' events,
 | 
						|
    'data' events, and "done with data" events.
 | 
						|
 | 
						|
    If you encounter an empty-element tag (aka a self-closing tag,
 | 
						|
    like HTML's <br> tag), call handle_starttag and then
 | 
						|
    handle_endtag.
 | 
						|
    """
 | 
						|
    ROOT_TAG_NAME = u'[document]'
 | 
						|
 | 
						|
    # If the end-user gives no indication which tree builder they
 | 
						|
    # want, look for one with these features.
 | 
						|
    DEFAULT_BUILDER_FEATURES = ['html', 'fast']
 | 
						|
 | 
						|
    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 | 
						|
 | 
						|
    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
 | 
						|
 | 
						|
    def __init__(self, markup="", features=None, builder=None,
 | 
						|
                 parse_only=None, from_encoding=None, exclude_encodings=None,
 | 
						|
                 **kwargs):
 | 
						|
        """The Soup object is initialized as the 'root tag', and the
 | 
						|
        provided markup (which can be a string or a file-like object)
 | 
						|
        is fed into the underlying parser."""
 | 
						|
 | 
						|
        if 'convertEntities' in kwargs:
 | 
						|
            warnings.warn(
 | 
						|
                "BS4 does not respect the convertEntities argument to the "
 | 
						|
                "BeautifulSoup constructor. Entities are always converted "
 | 
						|
                "to Unicode characters.")
 | 
						|
 | 
						|
        if 'markupMassage' in kwargs:
 | 
						|
            del kwargs['markupMassage']
 | 
						|
            warnings.warn(
 | 
						|
                "BS4 does not respect the markupMassage argument to the "
 | 
						|
                "BeautifulSoup constructor. The tree builder is responsible "
 | 
						|
                "for any necessary markup massage.")
 | 
						|
 | 
						|
        if 'smartQuotesTo' in kwargs:
 | 
						|
            del kwargs['smartQuotesTo']
 | 
						|
            warnings.warn(
 | 
						|
                "BS4 does not respect the smartQuotesTo argument to the "
 | 
						|
                "BeautifulSoup constructor. Smart quotes are always converted "
 | 
						|
                "to Unicode characters.")
 | 
						|
 | 
						|
        if 'selfClosingTags' in kwargs:
 | 
						|
            del kwargs['selfClosingTags']
 | 
						|
            warnings.warn(
 | 
						|
                "BS4 does not respect the selfClosingTags argument to the "
 | 
						|
                "BeautifulSoup constructor. The tree builder is responsible "
 | 
						|
                "for understanding self-closing tags.")
 | 
						|
 | 
						|
        if 'isHTML' in kwargs:
 | 
						|
            del kwargs['isHTML']
 | 
						|
            warnings.warn(
 | 
						|
                "BS4 does not respect the isHTML argument to the "
 | 
						|
                "BeautifulSoup constructor. Suggest you use "
 | 
						|
                "features='lxml' for HTML and features='lxml-xml' for "
 | 
						|
                "XML.")
 | 
						|
 | 
						|
        def deprecated_argument(old_name, new_name):
 | 
						|
            if old_name in kwargs:
 | 
						|
                warnings.warn(
 | 
						|
                    'The "%s" argument to the BeautifulSoup constructor '
 | 
						|
                    'has been renamed to "%s."' % (old_name, new_name))
 | 
						|
                value = kwargs[old_name]
 | 
						|
                del kwargs[old_name]
 | 
						|
                return value
 | 
						|
            return None
 | 
						|
 | 
						|
        parse_only = parse_only or deprecated_argument(
 | 
						|
            "parseOnlyThese", "parse_only")
 | 
						|
 | 
						|
        from_encoding = from_encoding or deprecated_argument(
 | 
						|
            "fromEncoding", "from_encoding")
 | 
						|
 | 
						|
        if from_encoding and isinstance(markup, unicode):
 | 
						|
            warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
 | 
						|
            from_encoding = None
 | 
						|
 | 
						|
        if len(kwargs) > 0:
 | 
						|
            arg = kwargs.keys().pop()
 | 
						|
            raise TypeError(
 | 
						|
                "__init__() got an unexpected keyword argument '%s'" % arg)
 | 
						|
 | 
						|
        if builder is None:
 | 
						|
            original_features = features
 | 
						|
            if isinstance(features, basestring):
 | 
						|
                features = [features]
 | 
						|
            if features is None or len(features) == 0:
 | 
						|
                features = self.DEFAULT_BUILDER_FEATURES
 | 
						|
            builder_class = builder_registry.lookup(*features)
 | 
						|
            if builder_class is None:
 | 
						|
                raise FeatureNotFound(
 | 
						|
                    "Couldn't find a tree builder with the features you "
 | 
						|
                    "requested: %s. Do you need to install a parser library?"
 | 
						|
                    % ",".join(features))
 | 
						|
            builder = builder_class()
 | 
						|
            if not (original_features == builder.NAME or
 | 
						|
                    original_features in builder.ALTERNATE_NAMES):
 | 
						|
                if builder.is_xml:
 | 
						|
                    markup_type = "XML"
 | 
						|
                else:
 | 
						|
                    markup_type = "HTML"
 | 
						|
 | 
						|
                caller = traceback.extract_stack()[0]
 | 
						|
                filename = caller[0]
 | 
						|
                line_number = caller[1]
 | 
						|
                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
 | 
						|
                    filename=filename,
 | 
						|
                    line_number=line_number,
 | 
						|
                    parser=builder.NAME,
 | 
						|
                    markup_type=markup_type))
 | 
						|
 | 
						|
        self.builder = builder
 | 
						|
        self.is_xml = builder.is_xml
 | 
						|
        self.known_xml = self.is_xml
 | 
						|
        self.builder.soup = self
 | 
						|
 | 
						|
        self.parse_only = parse_only
 | 
						|
 | 
						|
        if hasattr(markup, 'read'):        # It's a file-type object.
 | 
						|
            markup = markup.read()
 | 
						|
        elif len(markup) <= 256 and (
 | 
						|
                (isinstance(markup, bytes) and not b'<' in markup)
 | 
						|
                or (isinstance(markup, unicode) and not u'<' in markup)
 | 
						|
        ):
 | 
						|
            # Print out warnings for a couple beginner problems
 | 
						|
            # involving passing non-markup to Beautiful Soup.
 | 
						|
            # Beautiful Soup will still parse the input as markup,
 | 
						|
            # just in case that's what the user really wants.
 | 
						|
            if (isinstance(markup, unicode)
 | 
						|
                and not os.path.supports_unicode_filenames):
 | 
						|
                possible_filename = markup.encode("utf8")
 | 
						|
            else:
 | 
						|
                possible_filename = markup
 | 
						|
            is_file = False
 | 
						|
            try:
 | 
						|
                is_file = os.path.exists(possible_filename)
 | 
						|
            except Exception, e:
 | 
						|
                # This is almost certainly a problem involving
 | 
						|
                # characters not valid in filenames on this
 | 
						|
                # system. Just let it go.
 | 
						|
                pass
 | 
						|
            if is_file:
 | 
						|
                if isinstance(markup, unicode):
 | 
						|
                    markup = markup.encode("utf8")
 | 
						|
                warnings.warn(
 | 
						|
                    '"%s" looks like a filename, not markup. You should'
 | 
						|
                    ' probably open this file and pass the filehandle into'
 | 
						|
                    ' Beautiful Soup.' % markup)
 | 
						|
            self._check_markup_is_url(markup)
 | 
						|
 | 
						|
        for (self.markup, self.original_encoding, self.declared_html_encoding,
 | 
						|
         self.contains_replacement_characters) in (
 | 
						|
             self.builder.prepare_markup(
 | 
						|
                 markup, from_encoding, exclude_encodings=exclude_encodings)):
 | 
						|
            self.reset()
 | 
						|
            try:
 | 
						|
                self._feed()
 | 
						|
                break
 | 
						|
            except ParserRejectedMarkup:
 | 
						|
                pass
 | 
						|
 | 
						|
        # Clear out the markup and remove the builder's circular
 | 
						|
        # reference to this object.
 | 
						|
        self.markup = None
 | 
						|
        self.builder.soup = None
 | 
						|
 | 
						|
    def __copy__(self):
 | 
						|
        copy = type(self)(
 | 
						|
            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
 | 
						|
        )
 | 
						|
 | 
						|
        # Although we encoded the tree to UTF-8, that may not have
 | 
						|
        # been the encoding of the original markup. Set the copy's
 | 
						|
        # .original_encoding to reflect the original object's
 | 
						|
        # .original_encoding.
 | 
						|
        copy.original_encoding = self.original_encoding
 | 
						|
        return copy
 | 
						|
 | 
						|
    def __getstate__(self):
 | 
						|
        # Frequently a tree builder can't be pickled.
 | 
						|
        d = dict(self.__dict__)
 | 
						|
        if 'builder' in d and not self.builder.picklable:
 | 
						|
            d['builder'] = None
 | 
						|
        return d
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _check_markup_is_url(markup):
 | 
						|
        """ 
 | 
						|
        Check if markup looks like it's actually a url and raise a warning 
 | 
						|
        if so. Markup can be unicode or str (py2) / bytes (py3).
 | 
						|
        """
 | 
						|
        if isinstance(markup, bytes):
 | 
						|
            space = b' '
 | 
						|
            cant_start_with = (b"http:", b"https:")
 | 
						|
        elif isinstance(markup, unicode):
 | 
						|
            space = u' '
 | 
						|
            cant_start_with = (u"http:", u"https:")
 | 
						|
        else:
 | 
						|
            return
 | 
						|
 | 
						|
        if any(markup.startswith(prefix) for prefix in cant_start_with):
 | 
						|
            if not space in markup:
 | 
						|
                if isinstance(markup, bytes):
 | 
						|
                    decoded_markup = markup.decode('utf-8', 'replace')
 | 
						|
                else:
 | 
						|
                    decoded_markup = markup
 | 
						|
                warnings.warn(
 | 
						|
                    '"%s" looks like a URL. Beautiful Soup is not an'
 | 
						|
                    ' HTTP client. You should probably use an HTTP client like'
 | 
						|
                    ' requests to get the document behind the URL, and feed'
 | 
						|
                    ' that document to Beautiful Soup.' % decoded_markup
 | 
						|
                )
 | 
						|
 | 
						|
    def _feed(self):
 | 
						|
        # Convert the document to Unicode.
 | 
						|
        self.builder.reset()
 | 
						|
 | 
						|
        self.builder.feed(self.markup)
 | 
						|
        # Close out any unfinished strings and close all the open tags.
 | 
						|
        self.endData()
 | 
						|
        while self.currentTag.name != self.ROOT_TAG_NAME:
 | 
						|
            self.popTag()
 | 
						|
 | 
						|
    def reset(self):
 | 
						|
        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
 | 
						|
        self.hidden = 1
 | 
						|
        self.builder.reset()
 | 
						|
        self.current_data = []
 | 
						|
        self.currentTag = None
 | 
						|
        self.tagStack = []
 | 
						|
        self.preserve_whitespace_tag_stack = []
 | 
						|
        self.pushTag(self)
 | 
						|
 | 
						|
    def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
 | 
						|
        """Create a new tag associated with this soup."""
 | 
						|
        return Tag(None, self.builder, name, namespace, nsprefix, attrs)
 | 
						|
 | 
						|
    def new_string(self, s, subclass=NavigableString):
 | 
						|
        """Create a new NavigableString associated with this soup."""
 | 
						|
        return subclass(s)
 | 
						|
 | 
						|
    def insert_before(self, successor):
 | 
						|
        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
 | 
						|
 | 
						|
    def insert_after(self, successor):
 | 
						|
        raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
 | 
						|
 | 
						|
    def popTag(self):
 | 
						|
        tag = self.tagStack.pop()
 | 
						|
        if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
 | 
						|
            self.preserve_whitespace_tag_stack.pop()
 | 
						|
        #print "Pop", tag.name
 | 
						|
        if self.tagStack:
 | 
						|
            self.currentTag = self.tagStack[-1]
 | 
						|
        return self.currentTag
 | 
						|
 | 
						|
    def pushTag(self, tag):
 | 
						|
        #print "Push", tag.name
 | 
						|
        if self.currentTag:
 | 
						|
            self.currentTag.contents.append(tag)
 | 
						|
        self.tagStack.append(tag)
 | 
						|
        self.currentTag = self.tagStack[-1]
 | 
						|
        if tag.name in self.builder.preserve_whitespace_tags:
 | 
						|
            self.preserve_whitespace_tag_stack.append(tag)
 | 
						|
 | 
						|
    def endData(self, containerClass=NavigableString):
 | 
						|
        if self.current_data:
 | 
						|
            current_data = u''.join(self.current_data)
 | 
						|
            # If whitespace is not preserved, and this string contains
 | 
						|
            # nothing but ASCII spaces, replace it with a single space
 | 
						|
            # or newline.
 | 
						|
            if not self.preserve_whitespace_tag_stack:
 | 
						|
                strippable = True
 | 
						|
                for i in current_data:
 | 
						|
                    if i not in self.ASCII_SPACES:
 | 
						|
                        strippable = False
 | 
						|
                        break
 | 
						|
                if strippable:
 | 
						|
                    if '\n' in current_data:
 | 
						|
                        current_data = '\n'
 | 
						|
                    else:
 | 
						|
                        current_data = ' '
 | 
						|
 | 
						|
            # Reset the data collector.
 | 
						|
            self.current_data = []
 | 
						|
 | 
						|
            # Should we add this string to the tree at all?
 | 
						|
            if self.parse_only and len(self.tagStack) <= 1 and \
 | 
						|
                   (not self.parse_only.text or \
 | 
						|
                    not self.parse_only.search(current_data)):
 | 
						|
                return
 | 
						|
 | 
						|
            o = containerClass(current_data)
 | 
						|
            self.object_was_parsed(o)
 | 
						|
 | 
						|
    def object_was_parsed(self, o, parent=None, most_recent_element=None):
 | 
						|
        """Add an object to the parse tree."""
 | 
						|
        parent = parent or self.currentTag
 | 
						|
        previous_element = most_recent_element or self._most_recent_element
 | 
						|
 | 
						|
        next_element = previous_sibling = next_sibling = None
 | 
						|
        if isinstance(o, Tag):
 | 
						|
            next_element = o.next_element
 | 
						|
            next_sibling = o.next_sibling
 | 
						|
            previous_sibling = o.previous_sibling
 | 
						|
            if not previous_element:
 | 
						|
                previous_element = o.previous_element
 | 
						|
 | 
						|
        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
 | 
						|
 | 
						|
        self._most_recent_element = o
 | 
						|
        parent.contents.append(o)
 | 
						|
 | 
						|
        if parent.next_sibling:
 | 
						|
            # This node is being inserted into an element that has
 | 
						|
            # already been parsed. Deal with any dangling references.
 | 
						|
            index = len(parent.contents)-1
 | 
						|
            while index >= 0:
 | 
						|
                if parent.contents[index] is o:
 | 
						|
                    break
 | 
						|
                index -= 1
 | 
						|
            else:
 | 
						|
                raise ValueError(
 | 
						|
                    "Error building tree: supposedly %r was inserted "
 | 
						|
                    "into %r after the fact, but I don't see it!" % (
 | 
						|
                        o, parent
 | 
						|
                    )
 | 
						|
                )
 | 
						|
            if index == 0:
 | 
						|
                previous_element = parent
 | 
						|
                previous_sibling = None
 | 
						|
            else:
 | 
						|
                previous_element = previous_sibling = parent.contents[index-1]
 | 
						|
            if index == len(parent.contents)-1:
 | 
						|
                next_element = parent.next_sibling
 | 
						|
                next_sibling = None
 | 
						|
            else:
 | 
						|
                next_element = next_sibling = parent.contents[index+1]
 | 
						|
 | 
						|
            o.previous_element = previous_element
 | 
						|
            if previous_element:
 | 
						|
                previous_element.next_element = o
 | 
						|
            o.next_element = next_element
 | 
						|
            if next_element:
 | 
						|
                next_element.previous_element = o
 | 
						|
            o.next_sibling = next_sibling
 | 
						|
            if next_sibling:
 | 
						|
                next_sibling.previous_sibling = o
 | 
						|
            o.previous_sibling = previous_sibling
 | 
						|
            if previous_sibling:
 | 
						|
                previous_sibling.next_sibling = o
 | 
						|
 | 
						|
    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
 | 
						|
        """Pops the tag stack up to and including the most recent
 | 
						|
        instance of the given tag. If inclusivePop is false, pops the tag
 | 
						|
        stack up to but *not* including the most recent instqance of
 | 
						|
        the given tag."""
 | 
						|
        #print "Popping to %s" % name
 | 
						|
        if name == self.ROOT_TAG_NAME:
 | 
						|
            # The BeautifulSoup object itself can never be popped.
 | 
						|
            return
 | 
						|
 | 
						|
        most_recently_popped = None
 | 
						|
 | 
						|
        stack_size = len(self.tagStack)
 | 
						|
        for i in range(stack_size - 1, 0, -1):
 | 
						|
            t = self.tagStack[i]
 | 
						|
            if (name == t.name and nsprefix == t.prefix):
 | 
						|
                if inclusivePop:
 | 
						|
                    most_recently_popped = self.popTag()
 | 
						|
                break
 | 
						|
            most_recently_popped = self.popTag()
 | 
						|
 | 
						|
        return most_recently_popped
 | 
						|
 | 
						|
    def handle_starttag(self, name, namespace, nsprefix, attrs):
 | 
						|
        """Push a start tag on to the stack.
 | 
						|
 | 
						|
        If this method returns None, the tag was rejected by the
 | 
						|
        SoupStrainer. You should proceed as if the tag had not occurred
 | 
						|
        in the document. For instance, if this was a self-closing tag,
 | 
						|
        don't call handle_endtag.
 | 
						|
        """
 | 
						|
 | 
						|
        # print "Start tag %s: %s" % (name, attrs)
 | 
						|
        self.endData()
 | 
						|
 | 
						|
        if (self.parse_only and len(self.tagStack) <= 1
 | 
						|
            and (self.parse_only.text
 | 
						|
                 or not self.parse_only.search_tag(name, attrs))):
 | 
						|
            return None
 | 
						|
 | 
						|
        tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
 | 
						|
                  self.currentTag, self._most_recent_element)
 | 
						|
        if tag is None:
 | 
						|
            return tag
 | 
						|
        if self._most_recent_element:
 | 
						|
            self._most_recent_element.next_element = tag
 | 
						|
        self._most_recent_element = tag
 | 
						|
        self.pushTag(tag)
 | 
						|
        return tag
 | 
						|
 | 
						|
    def handle_endtag(self, name, nsprefix=None):
 | 
						|
        #print "End tag: " + name
 | 
						|
        self.endData()
 | 
						|
        self._popToTag(name, nsprefix)
 | 
						|
 | 
						|
    def handle_data(self, data):
 | 
						|
        self.current_data.append(data)
 | 
						|
 | 
						|
    def decode(self, pretty_print=False,
 | 
						|
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
 | 
						|
               formatter="minimal"):
 | 
						|
        """Returns a string or Unicode representation of this document.
 | 
						|
        To get Unicode, pass None for encoding."""
 | 
						|
 | 
						|
        if self.is_xml:
 | 
						|
            # Print the XML declaration
 | 
						|
            encoding_part = ''
 | 
						|
            if eventual_encoding != None:
 | 
						|
                encoding_part = ' encoding="%s"' % eventual_encoding
 | 
						|
            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
 | 
						|
        else:
 | 
						|
            prefix = u''
 | 
						|
        if not pretty_print:
 | 
						|
            indent_level = None
 | 
						|
        else:
 | 
						|
            indent_level = 0
 | 
						|
        return prefix + super(BeautifulSoup, self).decode(
 | 
						|
            indent_level, eventual_encoding, formatter)
 | 
						|
 | 
						|
# Alias to make it easier to type import: 'from bs4 import _soup'
 | 
						|
_s = BeautifulSoup
 | 
						|
_soup = BeautifulSoup
 | 
						|
 | 
						|
class BeautifulStoneSoup(BeautifulSoup):
 | 
						|
    """Deprecated interface to an XML parser."""
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
        kwargs['features'] = 'xml'
 | 
						|
        warnings.warn(
 | 
						|
            'The BeautifulStoneSoup class is deprecated. Instead of using '
 | 
						|
            'it, pass features="xml" into the BeautifulSoup constructor.')
 | 
						|
        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
 | 
						|
 | 
						|
 | 
						|
class StopParsing(Exception):
 | 
						|
    pass
 | 
						|
 | 
						|
class FeatureNotFound(ValueError):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
#By default, act as an HTML pretty-printer.
 | 
						|
if __name__ == '__main__':
 | 
						|
    import sys
 | 
						|
    soup = BeautifulSoup(sys.stdin)
 | 
						|
    print soup.prettify()
 |