
    3j8                       U d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 i d e
dd	h      d
 e
ddh      d e
dh      d e
dh      d e
dh      d e
ddh      d e
ddh      d e
dh      d e
dh      d e
dh      d e
dh      d e
dh      d e
h d       d! e
d"d#h      d$ e
d%h      d& e
d	h      d' e
d(h       e
d)h       e
d*h       e
d+h       e
d,h       e
d-h       e
d.h       e
d/d0h       e
d1h       e
d1h       e
d2d3h       e
d4h       e
d4h      d5Zd6ed7<   d	dd	d(d)d*d+d,d-dd.dd8Zd9ed:<   dd;Z	 	 	 	 dd<ZeZi ddd=d>d?d@ddAdddBdddCdDdEd"ddFdGdHd&dIdJdKdLdMdNdOdPdQdRdSdTi dUdVdWdXddYdZd
d1d[d4d\d]d^d)d_d	d`d+dad-dbd,dcdddedfdgdhdidjdkdldmZd9edn<   	 	 	 	 ddoZdpZdqedr<   dsZdqedt<   du f	 	 	 	 	 ddvZ ee      Zd6edw<   ddxZi Zd6edy<   ej3                         D ]=  \  ZZ e	e      xs eZ e
dz eD              Zej=                  e e
             ez  ee<   ?  eed{ |      Zd6ed}<   dd~Z ddZ! e
ddh      Z"ded<   ddZ#	 	 	 	 	 	 	 	 ddZ$y)a$  Encoding equivalences and name remapping.

This module defines:

1. **Directional supersets** for accuracy evaluation: detecting a superset
   encoding when the expected encoding is a subset is correct (e.g., detecting
   UTF-8 when expected is ASCII), but not the reverse.

2. **Bidirectional equivalents**: groups of encodings where detecting any
   member when another member was expected is considered correct.  This
   includes UTF-16/UTF-32 endian variants (which encode the same text with
   different byte order) and ISO-2022-JP branch variants (which are
   compatible extensions of the same base encoding).

3. **Preferred superset mapping** for the ``prefer_superset`` API option:
   replaces detected ISO/subset encoding names with their Windows/CP superset
   equivalents that modern software actually uses.

4. **Compatibility names** for the default ``compat_names=True`` mode: maps
   internal Python codec names to the names chardet 5.x/6.x returned,
   preserving backward compatibility for callers that compare encoding
   strings directly.
    )annotationsN)Callable)DetectionDictlookup_encodingASCIIzutf-8cp1252zTIS-620
iso8859-11cp874zISO-8859-11GB2312gb18030GBKBig5	big5hkscscp950	Shift_JIScp932shift_jis_2004zShift-JISX0213zEUC-JPeuc_jis_2004zEUC-JISX0213zEUC-KRcp949CP037cp1140zISO-2022-JP>   iso2022_jp_2iso2022_jp_extiso2022_jp_2004zISO2022-JP-1r   r   zISO2022-JP-3r   z
ISO-8859-1z
ISO-8859-2cp1250cp1251cp1256cp1253cp1255cp1254cp1257z	utf-16-lez	utf-16-bezutf-16z	utf-32-lez	utf-32-bezutf-32)
ISO-8859-5z
ISO-8859-6
ISO-8859-7
ISO-8859-8
ISO-8859-9zISO-8859-13UTF-16z	UTF-16-LEz	UTF-16-BEUTF-32z	UTF-32-LEz	UTF-32-BEdict[str, frozenset[str]]	SUPERSETS)asciieuc_kr	iso8859-1z	iso8859-2	iso8859-5z	iso8859-6	iso8859-7	iso8859-8	iso8859-9r
   z
iso8859-13tis-620dict[str, str]PREFERRED_SUPERSETc                r    | j                  d      }t        |t              r|j                  ||      | d<   | S )zGReplace the encoding name using *mapping*, modifying *result* in-place.encoding)get
isinstancestr)resultmappingencs      A/DATA/.local/lib/python3.12/site-packages/chardet/equivalences.py_remap_encodingr>   l   s4    
**Z
 C#s$[[c2zM    c                "    t        | t              S )a7  Replace the encoding name with its preferred Windows/CP superset.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r>   r4   r:   s    r=   apply_preferred_supersetrB   t   s     6#566r?   cp855IBM855cp866IBM866CP949r,   GB18030hzz
HZ-GB-2312
iso2022_krzISO-2022-KRr-   r.   r#   r/   r$   r0   r%   r1   r&   johabJohabzkoi8-rzKOI8-Rzmac-cyrillicMacCyrillicz	mac-romanMacRoman	SHIFT_JISr2   r'   r(   z	utf-8-sigz	UTF-8-SIGzWindows-1251zWindows-1252zWindows-1253zWindows-1254zWindows-1255kz1048KZ1048z	mac-greekMacGreekzmac-iceland
MacIcelandz
mac-latin2	MacLatin2zmac-turkish
MacTurkish_COMPAT_NAMESc                "    t        | t              S )a7  Convert internal codec names to chardet 5.x/6.x compatible names.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r>   rV   rA   s    r=   apply_compat_namesrX      s     6=11r?   ))r   r   r   tuple[tuple[str, ...], ...]BIDIRECTIONAL_GROUPS))skcs)ukrubgbe)msid)nodasvLANGUAGE_EQUIVALENCESc                    | S N )xs    r=   <lambda>rk      s    r?   c                f    i }| D ](  }t        fd|D              }|D ]  }|| |      <    * |S )zJBuild a lookup: key -> frozenset of all equivalent keys in the same group.c              3  .   K   | ]  } |        y wrh   ri   ).0n	normalizes     r=   	<genexpr>z%_build_group_index.<locals>.<genexpr>   s     7A9Q<s   )	frozenset)groupsrp   r:   groupnormednames    `    r=   _build_group_indexrw      sC    
 )+F777D&,F9T?#   Mr?   _LANGUAGE_EQUIVc                L    | |k(  ryt         j                  |       }|duxr ||v S )a  Check whether *detected* is an acceptable language for *expected*.

    Returns ``True`` when *expected* and *detected* are the same ISO 639-1
    code, or belong to the same equivalence group in
    :data:`LANGUAGE_EQUIVALENCES`.

    :param expected: Expected ISO 639-1 language code.
    :param detected: Detected ISO 639-1 language code.
    :returns: ``True`` if the languages are equivalent.
    TN)rx   r7   )expecteddetectedrt   s      r=   is_language_equivalentr|      s4     8)E2U!22r?   _NORMALIZED_SUPERSETSc              #  :   K   | ]  }t        |      xs |  y wrh   r   )rn   ss     r=   rq   rq   	  s     DA*/a/   c                     t        |       xs | S rh   r   )ro   s    r=   rk   rk     s    oa.@.EA.Er?   )rp   _NORMALIZED_BIDIRc                    | |du S |yt        |       xs | j                         }t        |      xs |j                         }||k(  ry|t        v r|t        |   v ry|t        v xr |t        |   v S )a  Check whether *detected* is an acceptable answer for *expected*.

    Acceptable means:

    1. Exact match (after normalization), OR
    2. Both belong to the same bidirectional byte-order group, OR
    3. *detected* is a known superset of *expected*.

    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if the detection is acceptable.
    NFT)r   lowerr   r}   )rz   r{   norm_expnorm_dets       r=   
is_correctr     s     4x(<HNN,<Hx(<HNN,<H 8 $$5Fx5P)P 	)) 	8-h77r?   c                ^    t        j                  d|       }dj                  d |D              S )z4NFKD-normalize *text* and strip all combining marks.NFKD c              3  L   K   | ]  }t        j                  |      r|  y wrh   )unicodedata	combining)rn   cs     r=   rq   z#_strip_combining.<locals>.<genexpr>8  s     Cd+*?*?*B1ds   $$)r   rp   join)textnfkds     r=   _strip_combiningr   5  s)      .D77CdCCCr?   )   ¤   €)r   r   zfrozenset[tuple[str, str]]_EQUIVALENT_SYMBOL_PAIRSc                R    | |k(  ry| |ft         v ryt        |       t        |      k(  S )u   Return True if characters *a* and *b* are functionally equivalent.

    Equivalent means:
    - Same character, OR
    - Same base letter after stripping combining marks, OR
    - An explicitly listed symbol equivalence (e.g. ¤ ↔ €)
    T)r   r   )abs     r=   _chars_equivalentr   E  s4     	Av	1v))A"21"555r?   c                |   ||du S |yt        |      xs |j                         }t        |      xs |j                         }||k(  ry	 | j                  |      }| j                  |      }||k(  ryt        |      t        |      k7  ryt        d t        ||d      D              S # t        t        f$ r Y yw xY w)u  Check whether *detected* produces functionally identical text to *expected*.

    Returns ``True`` when:

    1. *detected* is not ``None`` and both encoding names normalize to the same
       codec, OR
    2. Decoding *data* with both encodings yields identical strings, OR
    3. Every differing character pair is functionally equivalent: same base
       letter after stripping combining marks, or an explicitly listed symbol
       equivalence (e.g. ¤ ↔ €).

    Returns ``False`` if *detected* is ``None``, either encoding is unknown,
    or either encoding cannot decode *data*.

    :param data: The raw byte data that was detected.
    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if decoding with *detected* yields functionally identical
        text to decoding with *expected*.
    NFTc              3  :   K   | ]  \  }}t        ||        y wrh   )r   )rn   r   r   s      r=   rq   z*is_equivalent_detection.<locals>.<genexpr>  s     X3W41a A&3Wr   )strict)r   r   decodeUnicodeDecodeErrorLookupErrorlenallzip)datarz   r{   r   r   text_exptext_dets          r=   is_equivalent_detectionr   U  s    . 4x(<HNN,<Hx(<HNN,<H8;;x(;;x( 8
8}H%X3xRV3WXXX , s   "B) )B;:B;)r:   r   r;   r3   returnr   )r:   r   r   r   )rs   rY   rp   zCallable[[str], str]r   r)   )rz   r9   r{   r9   r   bool)rz   
str | Noner{   r   r   r   )r   r9   r   r9   )r   r9   r   r9   r   r   )r   bytesrz   r   r{   r   r   r   )%__doc__
__future__r   r   collections.abcr   chardet.pipeliner   chardet.registryr   rr   r*   __annotations__r4   r>   rB   apply_legacy_renamerV   rX   rZ   rf   rw   rx   r|   r}   items_subset
_supersets_key_normedr7   r   r   r   r   r   r   ri   r?   r=   <module>r      s[  0 #  $ * ,+(Y*++(y,01+( 9gY'+( i$	+(
 
9i[!+( I{G,-+( G%567+( i!1 23+( i()+( I~./+( i	"+( Yz"+(. 9RS/+(0 I~/?@A1+(2 I0123+(6 )XJ'7+(8 )XJ'9+(: XJ'XJ'XJ'XJ'XJ'hZ( k23H:&H:&k23H:&H:&U+(	$ +f & N  777 / %!%! X%! X	%!
 W%! H%! h%! y%! 	,%! M%! -%! %! %! %! %!  !%!" W#%!$ h%%!& M'%!( )%!* k+%!, y-%!. h/%!0 h1%!2 3%!4 n5%!6 n7%!8 n9%!: n;%!< n=%!@ hA%!B C%!D <E%!F +G%!H <I%!~ %P222,5 1 6 2 & '2
'
#
 
 .@@U-V* V3* 46 0 5$??,GZ7#.wDDDDG"7";";D)+"NQX"X$ - 0B$E0 , 
 FD 8A8 4 6 .Y
.Y%.Y1;.Y	.Yr?   