ó
ÄqPc           @   s¹  d  Z  d d l m Z d d l m Z d d l m Z d d l m	 Z	 m
 Z
 m Z y
 e Z Wn e k
 r{ e e f Z n Xy d d l m Z Wn! e k
 r³ d d l m Z n Xy d d l m Z Wn! e k
 rë d d l m Z n Xd e f d	 „  ƒ  YZ y d d
 l m Z Wn e k
 r)n  Xd e f d „  ƒ  YZ e ƒ  Z d „  Z e d d „ Z e e d d „ Z e e d d „ Z  e d d „ Z! e d d „ Z" d „  Z# e ƒ  Z$ d S(   s?   
An interface to html5lib that mimics the lxml.html interface.
iÿÿÿÿ(   t
   HTMLParser(   t   TreeBuilder(   t   etree(   t   _contains_block_level_tagt   XHTML_NAMESPACEt   Element(   t   urlopen(   t   urlparseR    c           B   s   e  Z d  Z e d „ Z RS(   s*   An html5lib HTML parser with lxml as tree.c         K   s    t  j |  d | d t | d  S(   Nt   strictt   tree(   t   _HTMLParsert   __init__R   (   t   selfR   t   kwargs(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR      s    (   t   __name__t
   __module__t   __doc__t   FalseR   (    (    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR       s   (   t   XHTMLParserR   c           B   s   e  Z d  Z e d „ Z RS(   s+   An html5lib XHTML Parser with lxml as tree.c         K   s    t  j |  d | d t | d  S(   NR   R	   (   t   _XHTMLParserR   R   (   R   R   R   (    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR   (   s    (   R   R   R   R   R   (    (    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR   %   s   c         C   s6   |  j  | ƒ } | d  k	 r | S|  j  d t | f ƒ S(   Ns   {%s}%s(   t   findt   NoneR   (   R	   t   tagt   elem(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt	   _find_tag.   s    c         C   sL   t  |  t ƒ s t d ƒ ‚ n  | d k r3 t } n  | j |  d | ƒj ƒ  S(   s%   Parse a whole document into a string.s   string requiredt
   useChardetN(   t
   isinstancet   _stringst	   TypeErrorR   t   html_parsert   parset   getroot(   t   htmlt   guess_charsett   parser(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt   document_fromstring5   s
    	c         C   s¥   t  |  t ƒ s t d ƒ ‚ n  | d k r3 t } n  | j |  d d | ƒ} | r¡ t  | d t ƒ r¡ | r¡ | d j ƒ  r” t j d | d ƒ ‚ n  | d =q¡ n  | S(   s”  Parses several HTML elements, returning a list of elements.

    The first item in the list may be a string.  If no_leading_text is true,
    then it will be an error if there is leading text, and it will always be
    a list of only elements.

    If `guess_charset` is `True` and the text was not unicode but a
    bytestring, the `chardet` library will perform charset guessing on the
    string.
    s   string requiredt   divR   i    s   There is leading text: %rN(	   R   R   R   R   R   t   parseFragmentt   stripR   t   ParserError(   R    t   no_leading_textR!   R"   t   children(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt   fragments_fromstring@   s    		c         C   s;  t  |  t ƒ s t d ƒ ‚ n  t | ƒ } t |  d | d | d | ƒ} | r· t  | t ƒ sg d } n  t | ƒ } | r³ t  | d t ƒ r£ | d | _ | d =n  | j | ƒ n  | S| sÏ t j	 d ƒ ‚ n  t
 | ƒ d k ró t j	 d	 ƒ ‚ n  | d } | j r.| j j ƒ  r.t j	 d
 | j ƒ ‚ n  d | _ | S(   sX  Parses a single HTML element; it is an error if there is more than
    one element, or if anything but whitespace precedes or follows the
    element.

    If create_parent is true (or is a tag name) then a parent node
    will be created to encapsulate the HTML in a single element.  In
    this case, leading or trailing text is allowed.
    s   string requiredR!   R"   R(   R$   i    s   No elements foundi   s   Multiple elements founds   Element followed by text: %rN(   R   R   R   t   boolR*   R   t   textt   extendR   R'   t   lent   tailR&   R   (   R    t   create_parentR!   R"   t   accept_leading_textt   elementst   new_roott   result(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt   fragment_fromstring\   s2    

	

	c         C   s  t  |  t ƒ s t d ƒ ‚ n  t |  d | d | ƒ} |  d  j ƒ  j ƒ  } | j d ƒ sj | j d ƒ rn | St | d ƒ } t | ƒ r | St | d ƒ } t | ƒ d	 k rò | j	 sÈ | j	 j
 ƒ  rò | d
 j sê | d
 j j
 ƒ  rò | d St | ƒ r
d | _ n	 d | _ | S(   sü   Parse the html, returning a single element/document.

    This tries to minimally parse the chunk of text, without knowing if it
    is a fragment or a document.

    base_url will set the document's base_url attribute (and the tree's docinfo.URL)
    s   string requiredR"   R!   i2   s   <htmls	   <!doctypet   headt   bodyi   iÿÿÿÿi    R$   t   span(   R   R   R   R#   t   lstript   lowert
   startswithR   R.   R,   R&   R/   R   R   (   R    R!   R"   t   doct   startR6   R7   (    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt
   fromstring…   s$    	,"	c         C   sj   | d k r t } n  t |  t ƒ s- |  } n* t |  ƒ rH t |  ƒ } n t |  d ƒ } | j | d | ƒS(   s·   Parse a filename, URL, or file-like object into an HTML document
    tree.  Note: this returns a tree, not an element.  Use
    ``parse(...).getroot()`` to get the document root.
    t   rbR   N(   R   R   R   R   t   _looks_like_urlR   t   openR   (   t   filename_url_or_fileR!   R"   t   fp(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR   ¯   s    		c         C   s   t  |  ƒ d } | d k S(   Ni    t    (   R   (   t   strt   scheme(    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyR@   ¾   s    N(%   R   t   html5libR    R
   t    html5lib.treebuilders.etree_lxmlR   t   lxmlR   t	   lxml.htmlR   R   R   t
   basestringR   t	   NameErrort   bytesRE   t   urllib2R   t   ImportErrort   urllib.requestR   t   urllib.parseR   R   t   xhtml_parserR   t   TrueR   R#   R   R*   R5   R>   R   R@   R   (    (    (    s;   /usr/lib64/python2.7/site-packages/lxml/html/html5parser.pyt   <module>   sB   
		(*	