
    3ja*                         U d Z ddlmZmZ dedefdZdZeed<    e	e      Z
e	e   ed<    ee      D  ci c]  \  } }|| 
 c}} Zeeef   ed	<   d
edefdZdededefdZdedefdZdededz  fdZyc c}} w )a  Early detection of escape-sequence-based encodings (ISO-2022, HZ-GB-2312, UTF-7).

These encodings use ESC (0x1B), tilde (~), or plus (+) sequences to switch
character sets.  They must be detected before binary detection (ESC is a control
byte) and before ASCII detection (HZ-GB-2312 and UTF-7 use only printable ASCII
bytes plus their respective shift markers).

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )DETERMINISTIC_CONFIDENCEDetectionResultdatareturnc                     d}	 | j                  d|      }|dk(  ry| j                  d|dz         }|dk(  ry| |dz   | }t        |      dk\  r$t        |      dz  dk(  rt        d |D              ry|dz   }s)	a  Check that at least one ~{...~} region contains valid GB2312 byte pairs.

    In HZ-GB-2312 GB mode, characters are encoded as pairs of bytes in the
    0x21-0x7E range.  We require at least one region with a non-empty, even-
    length run of such bytes.
    r   T   ~{F   ~}   c              3   <   K   | ]  }d |cxk  xr dk  nc   yw)!   ~   N ).0bs     D/DATA/.local/lib/python3.12/site-packages/chardet/pipeline/escape.py	<genexpr>z(_has_valid_hz_regions.<locals>.<genexpr>$   s     6v!DA%%%vs   )findlenall)r   startbeginendregions        r   _has_valid_hz_regionsr      s     E
		%'B;iiuqy)"9eai#& K1Fa1$6v66a     s@   ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
_B64_CHARS_UTF7_BASE64_B64_DECODE	b64_bytesc                    t        |       }|dz  }|dz  }|dkD  rt        | d      }d|z  dz
  }||z  ry|dz  }t        |      }d}d}	d}
| D ]1  }|dz  t        |   z  }|	dz  }	|	dk\  s|	dz  }	||	z	  dz  ||
<   |
dz  }
3 d}t        d|dz
  d	      D ]C  }||   dz  ||dz      z  }d
|cxk  rdk  r
n n|r yd})d|cxk  rdk  r
n n|s yd}>|r yd}E | S )u  Check if base64 bytes decode to valid UTF-16BE with correct padding.

    A valid UTF-7 shifted sequence must:
    1. Contain at least 3 Base64 characters (18 bits, enough for one 16-bit
       UTF-16 code unit).
    2. Have zero-valued trailing padding bits (the unused low bits of the last
       Base64 sextet after the last complete 16-bit code unit).
    3. Decode to valid UTF-16BE — no lone surrogates.

    This rejects accidental ``+<alphanum>-`` patterns found in URLs, MIME
    boundaries, hex-encoded hashes (e.g. SHA-1 git refs), and other ASCII data.

    The caller (``_has_valid_utf7_sequences``) already checks ``b64_len >= 3``
    before calling this function, so *b64_bytes* is always at least 3 bytes.
          r   r	      F      r   i   i  Ti   i  )r   r   	bytearrayrange)r    n
total_bitspadding_bitslast_valmask	num_bytesrawbit_buf	bit_countout_idxc	prev_highi	code_units                  r   _is_valid_utf7_b64r7   2   sB     	IAQJ ?Lay}-\!Q&d?
 aI
I
CGIGa<;q>1Q	>NI#y0D8CLqLG  I1i!mQ'Vq[CAJ.	Y(&(Iy*F*II ( =r   posc                     t         t        d      z  }d}|dz
  }|dk\  r*| |   }|dv r|dz  }||v r|dz  }|dz  }n	 |dk\  S |dk\  r*|dk\  S )ae  Return True if the ``+`` at *pos* is embedded in a base64 stream.

    Walks backward from *pos*, skipping CR/LF, and counts consecutive base64
    characters (including ``=`` for padding).  If 4 or more are found, the
    ``+`` is likely part of a PEM certificate, email attachment, or similar
    base64 blob rather than a real UTF-7 shift character.
       =r   r$   >   
         )r   	frozenset)r   r8   b64_with_padcountr5   r   s         r   _is_embedded_in_base64rA   o   s     $0)D/#ALEaA
q&GFAQJEFAA: q& A:r   c                    d}	 | j                  t        d      |      }|dk(  ry|dz   }|t        |       k  r| |   t        d      k(  r|dz   }L|t        |       k  rX| |   t        d      k(  rG|t        |       k  r6| |   t        d      k(  r%|dz  }|t        |       k  r| |   t        d      k(  r%|}t        | |      r|}|}|t        |       k  r*| |   t        v r|dz  }|t        |       k  r| |   t        v r||z
  }| || }|dk\  r|j                         r|}|dk\  rt        |      ryt        ||      }<)	as  Check that *data* contains at least one valid UTF-7 shifted sequence.

    A valid shifted sequence is ``+<base64 chars>`` terminated by either an
    explicit ``-`` or any non-Base64 character (per RFC 2152).  The base64
    portion must decode to valid UTF-16BE with correct zero-padding bits.
    The sequence ``+-`` is a literal plus sign and is **not** counted.
    r   T+r	   Fr$   -   )r   ordr   rA   r   islowerr7   max)r   r   	shift_posr8   r5   b64_lenb64_datas          r   _has_valid_utf7_sequencesrL      sa    E
IIc#h.	?!mT?tCyCH4!GE T?tCyCH4D	/d3i3s8&;q D	/d3i3s8&;E "$	2E#d)mQ< 7FA #d)mQ< 7c'A; a<H,,.E a<.x8Ca r   Nc                    d| v }d| v }d| v }|s|s|sy|rd| v sd| v sd| v rt        dt        d	
      S d| v rt        dt        d	
      S d| v sd| v sd| v sd| v r,d| v rd| v rt        dt        d	
      S t        dt        d	
      S d| v rt        dt        d
      S |r%d| v r!d| v rt        |       rt        dt        d
      S |r+t        |       dk  rt	        |       rt        dt        d
      S y)zDetect ISO-2022, HZ-GB-2312, and UTF-7 from escape/tilde/plus sequences.

    :param data: The raw byte data to examine.
    :returns: A :class:`DetectionResult` if an escape encoding is found, or ``None``.
          ~   +Ns   $(Os   $(Ps   $(Qiso2022_jp_2004ja)encoding
confidencelanguages   (Iiso2022_jp_exts   $Bs   $@s   (Js   $(D      iso2022_jp_2s   $)C
iso2022_krkor   r
   hzzh   zutf-7)r   r   r   rH   rL   )r   has_esc	has_tildehas_pluss       r   detect_escape_encodingrb      sY    oGIt|H9Xt!3zT7I"*3  ")3  D D T! $7d?&-7!  #'3  "%3  Ud]u}9Nt9T/
 	
 CI$)B4)H/
 	
 r   )__doc__chardet.pipeliner   r   bytesboolr   r   __annotations__r>   r   int	enumerater   dictr7   rA   rL   rb   )r5   r3   s   00r   <module>rk      s   
 G $ 6 X
E W(4in 4 1:*0EF0E1q!t0EFT#s(^ F:% :D :z S T 09E 9d 9xO O?T+A Oi Gs   A7