
    3jm                    <   d Z ddlmZ ddlZddlmZmZ ddlmZ dZ	 ej                  dej                        Z ej                  dej                        Z ej                  d	ej                        Z ej                  d
ej                        ZddZddZddZy)z<Stage 1b: charset declaration extraction (HTML/XML/PEP 263).    )annotationsN)DETERMINISTIC_CONFIDENCEDetectionResult)lookup_encodingi   s*   <\?xml[^>]+encoding\s*=\s*['"]([^'"]+)['"]s,   <meta[^>]+charset\s*=\s*['"]?\s*([^\s'">;]+)s6   <meta[^>]+content\s*=\s*['"][^'"]*charset=([^\s'">;]+)s&   ^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)c                j   d| dd vrydj                  | j                  dd      dd       }t        j                  |      }|r[	 |j	                  d      j                  d      j                         }t        |      }|t        | |      rt        |t        dd	      S y# t        t        f$ r Y yw xY w)
aB  Check the first two lines of *data* for a PEP 263 encoding declaration.

    PEP 263 declarations (e.g. ``# -*- coding: utf-8 -*-``) are only valid
    on line 1 or line 2 of a Python source file.

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
       #N      
      asciiztext/x-pythonencoding
confidencelanguage	mime_type)joinsplit
_PEP263_REsearchgroupdecodestripUnicodeDecodeError
ValueErrorr   _validate_bytesr   r   )datafirst_two_linesmatchraw_namer   s        D/DATA/.local/lib/python3.12/site-packages/chardet/pipeline/markup.py_detect_pep263r"      s     4:jjE1!5bq!9:Oo.E	{{1~,,W5;;=H #8,OD($C"!3)	   #J/ 		s   .B   B21B2c                   | sy| dt          }t        t        t        fD ]  }|j	                  |      }|s	 |j                  d      j                  d      j                         }t        |      }|Tt        | |      sa|t        u rdnd}t        |t        d|      c S  t        |       S # t        t        f$ r Y w xY w)a  Scan the first bytes of *data* for a charset declaration.

    Checks for:

    1. ``<?xml ... encoding="..."?>``
    2. ``<meta charset="...">``
    3. ``<meta http-equiv="Content-Type" content="...; charset=...">``
    4. PEP 263 ``# -*- coding: ... -*-`` (first two lines only)

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
    Nr   r   ztext/xmlz	text/htmlr   )_SCAN_LIMIT_XML_ENCODING_RE_HTML5_CHARSET_RE_HTML4_CONTENT_TYPE_REr   r   r   r   r   r   r   r   r   r   r"   )r   headpatternr   r    r   r   s          r!   detect_markup_charsetr*   :   s     D$&79OPt$ ;;q>009??A 'x0H#h(G*15E*EJ;	&%7!'	  Q" $ '
3 s   .B,,B>=B>c                j    	 | dt          j                  |       y# t        t        t        f$ r Y yw xY w)zCheck that *data* can be decoded under *encoding* without errors.

    Only validates the first ``_SCAN_LIMIT`` bytes to avoid decoding a
    full 200 kB input just to verify a charset declaration found in the
    header.
    NFT)r$   r   r   LookupErrorr   )r   r   s     r!   r   r   `   s<    \k!!(+  Z8 s    22)r   bytesreturnzDetectionResult | None)r   r-   r   strr.   bool)__doc__
__future__r   rechardet.pipeliner   r   chardet.registryr   r$   compile
IGNORECASEr%   r&   r'   	MULTILINEr   r"   r*   r        r!   <module>r;      s    B " 	 F ,2::6  BJJ8"--  $BBMM  RZZBBLLQ
># Lr:   