MSV FM

Path : /usr/lib64/python2.7/site-packages/lxml/html/
Current < : //usr/lib64/python2.7/site-packages/lxml/html/soupparser.py
"""External interface to the BeautifulSoup HTML parser.
"""

__all__ = ["fromstring", "parse", "convert_tree"]

import re
from lxml import etree, html

try:
    from bs4 import (
        BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
        Declaration, Doctype)
    _DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
except ImportError:
    from BeautifulSoup import (
        BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
        Declaration)
    _DECLARATION_OR_DOCTYPE = Declaration


def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
    """Parse a string of HTML data into an Element tree using the
    BeautifulSoup parser.

    Returns the root ``<html>`` Element of the tree.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    """
    return _parse(data, beautifulsoup, makeelement, **bsargs)


def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
    """Parse a file into an ElemenTree using the BeautifulSoup parser.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    """
    if not hasattr(file, 'read'):
        file = open(file)
    root = _parse(file, beautifulsoup, makeelement, **bsargs)
    return etree.ElementTree(root)


def convert_tree(beautiful_soup_tree, makeelement=None):
    """Convert a BeautifulSoup tree to a list of Element trees.

    Returns a list instead of a single root Element to support
    HTML-like soup with more than one root element.

    You can pass a different Element factory through the `makeelement`
    keyword.
    """
    root = _convert_tree(beautiful_soup_tree, makeelement)
    children = root.getchildren()
    for child in children:
        root.remove(child)
    return children


# helpers

def _parse(source, beautifulsoup, makeelement, **bsargs):
    if beautifulsoup is None:
        beautifulsoup = BeautifulSoup
    if hasattr(beautifulsoup, "HTML_ENTITIES"):  # bs3
        if 'convertEntities' not in bsargs:
            bsargs['convertEntities'] = 'html'
    if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"):  # bs4
        if 'features' not in bsargs:
            bsargs['features'] = 'html.parser'  # use Python html parser
    tree = beautifulsoup(source, **bsargs)
    root = _convert_tree(tree, makeelement)
    # from ET: wrap the document in a html root element, if necessary
    if len(root) == 1 and root[0].tag == "html":
        return root[0]
    root.tag = "html"
    return root


_parse_doctype_declaration = re.compile(
    r'(?:\s|[<!])*DOCTYPE\s*HTML'
    r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
    r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
    re.IGNORECASE).match


class _PseudoTag:
    # Minimal imitation of BeautifulSoup.Tag
    def __init__(self, contents):
        self.name = 'html'
        self.attrs = []
        self.contents = contents

    def __iter__(self):
        return self.contents.__iter__()


def _convert_tree(beautiful_soup_tree, makeelement):
    if makeelement is None:
        makeelement = html.html_parser.makeelement

    # Split the tree into three parts:
    # i) everything before the root element: document type
    # declaration, comments, processing instructions, whitespace
    # ii) the root(s),
    # iii) everything after the root: comments, processing
    # instructions, whitespace
    first_element_idx = last_element_idx = None
    html_root = declaration = None
    for i, e in enumerate(beautiful_soup_tree):
        if isinstance(e, Tag):
            if first_element_idx is None:
                first_element_idx = i
            last_element_idx = i
            if html_root is None and e.name and e.name.lower() == 'html':
                html_root = e
        elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
            declaration = e

    # For a nice, well-formatted document, the variable roots below is
    # a list consisting of a single <html> element. However, the document
    # may be a soup like '<meta><head><title>Hello</head><body>Hi
    # all<\p>'. In this example roots is a list containing meta, head
    # and body elements.
    if first_element_idx is None:
        pre_root = post_root = []
        roots = beautiful_soup_tree.contents
    else:
        pre_root = beautiful_soup_tree.contents[:first_element_idx]
        roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
        post_root = beautiful_soup_tree.contents[last_element_idx+1:]

    # Reorganize so that there is one <html> root...
    if html_root is not None:
        # ... use existing one if possible, ...
        i = roots.index(html_root)
        html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
    else:
        # ... otherwise create a new one.
        html_root = _PseudoTag(roots)

    convert_node = _init_node_converters(makeelement)

    # Process pre_root
    res_root = convert_node(html_root)
    prev = res_root
    for e in reversed(pre_root):
        converted = convert_node(e)
        if converted is not None:
            prev.addprevious(converted)
            prev = converted

    # ditto for post_root
    prev = res_root
    for e in post_root:
        converted = convert_node(e)
        if converted is not None:
            prev.addnext(converted)
            prev = converted

    if declaration is not None:
        try:
            # bs4 provides full Doctype string
            doctype_string = declaration.output_ready()
        except AttributeError:
            doctype_string = declaration.string

        match = _parse_doctype_declaration(doctype_string)
        if not match:
            # Something is wrong if we end up in here. Since soupparser should
            # tolerate errors, do not raise Exception, just let it pass.
            pass
        else:
            external_id, sys_uri = match.groups()
            docinfo = res_root.getroottree().docinfo
            # strip quotes and update DOCTYPE values (any of None, '', '...')
            docinfo.public_id = external_id and external_id[1:-1]
            docinfo.system_url = sys_uri and sys_uri[1:-1]

    return res_root


def _init_node_converters(makeelement):
    converters = {}
    ordered_node_types = []

    def converter(*types):
        def add(handler):
            for t in types:
                converters[t] = handler
                ordered_node_types.append(t)
            return handler
        return add

    def find_best_converter(node):
        for t in ordered_node_types:
            if isinstance(node, t):
                return converters[t]
        return None

    def convert_node(bs_node, parent=None):
        # duplicated in convert_tag() below
        try:
            handler = converters[type(bs_node)]
        except KeyError:
            handler = converters[type(bs_node)] = find_best_converter(bs_node)
        if handler is None:
            return None
        return handler(bs_node, parent)

    def map_attrs(bs_attrs):
        if isinstance(bs_attrs, dict):  # bs4
            attribs = {}
            for k, v in bs_attrs.items():
                if isinstance(v, list):
                    v = " ".join(v)
                attribs[k] = unescape(v)
        else:
            attribs = dict((k, unescape(v)) for k, v in bs_attrs)
        return attribs

    def append_text(parent, text):
        if len(parent) == 0:
            parent.text = (parent.text or '') + text
        else:
            parent[-1].tail = (parent[-1].tail or '') + text

    # converters are tried in order of their definition

    @converter(Tag, _PseudoTag)
    def convert_tag(bs_node, parent):
        attrs = bs_node.attrs
        if parent is not None:
            attribs = map_attrs(attrs) if attrs else None
            res = etree.SubElement(parent, bs_node.name, attrib=attribs)
        else:
            attribs = map_attrs(attrs) if attrs else {}
            res = makeelement(bs_node.name, attrib=attribs)

        for child in bs_node:
            # avoid double recursion by inlining convert_node(), see above
            try:
                handler = converters[type(child)]
            except KeyError:
                pass
            else:
                if handler is not None:
                    handler(child, res)
                continue
            convert_node(child, res)
        return res

    @converter(Comment)
    def convert_comment(bs_node, parent):
        res = html.HtmlComment(bs_node)
        if parent is not None:
            parent.append(res)
        return res

    @converter(ProcessingInstruction)
    def convert_pi(bs_node, parent):
        if bs_node.endswith('?'):
            # The PI is of XML style (<?as df?>) but BeautifulSoup
            # interpreted it as being SGML style (<?as df>). Fix.
            bs_node = bs_node[:-1]
        res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
        if parent is not None:
            parent.append(res)
        return res

    @converter(NavigableString)
    def convert_text(bs_node, parent):
        if parent is not None:
            append_text(parent, unescape(bs_node))
        return None

    return convert_node


# copied from ET's ElementSoup

try:
    from html.entities import name2codepoint  # Python 3
except ImportError:
    from htmlentitydefs import name2codepoint


handle_entities = re.compile(r"&(\w+);").sub


try:
    unichr
except NameError:
    # Python 3
    unichr = chr


def unescape(string):
    if not string:
        return ''
    # work around oddities in BeautifulSoup's entity handling
    def unescape_entity(m):
        try:
            return unichr(name2codepoint[m.group(1)])
        except KeyError:
            return m.group(0)  # use as is
    return handle_entities(unescape_entity, string)
MSV FM

THE FINEST HOTEL NEAR LAKE KIVU

The Perfect Base For You

About Us

Delicious Interior With The Pinch Of Everything

Accomodation

Get a Comfortable Room
Feel The Comfort

Restaurant

CHECK DETAILS

Standard Single

CHECK DETAILS

Kivu Side

CHECK DETAILS

Accomodation

CHECK DETAILS

Services

We Provide Top Class Facility
Especially For You

Beach BBQ Party

Breakfast

Conference Hall

Enjoy with your partner

Honeymoon Package

80%

Get In Touch

Menu

Meet Kinsley

Instagram

MSV FM

THE FINEST HOTEL NEAR LAKE KIVU

The Perfect Base For You

About Us

Delicious Interior With The Pinch Of Everything

Accomodation

Get a Comfortable Room Feel The Comfort

Restaurant

CHECK DETAILS

Standard Single

CHECK DETAILS

Kivu Side

CHECK DETAILS

Accomodation

CHECK DETAILS

Services

We Provide Top Class Facility Especially For You

Beach BBQ Party

Breakfast

Conference Hall

Enjoy with your partner

Honeymoon Package

80%

Get In Touch

Menu

Meet Kinsley

Instagram

Search your Room

Get a Comfortable Room
Feel The Comfort

We Provide Top Class Facility
Especially For You