
    Q3j                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z d dlmZmZmZ d dlmZmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZmZ d dl m!Z!m"Z" d d	l#m$Z$m%Z% d d
l&m'Z' d dl(m)Z)m*Z*m+Z+ g dZ, G d deed      Z- G d deed      Z. e"ddgdg e!eddd      g e!eddd      gdgdd      d dddd       Z/d"dZ0d"dZ1 G d  d!eed      Z2y)#    N)defaultdict)Integral)BaseEstimatorTransformerMixin_fit_context)_align_api_if_sparsecolumn_or_1d)	_find_matching_floating_dtype_is_numpy_namespace_isindeviceget_namespaceget_namespace_and_deviceindexing_dtypemove_toxpx)_encode_unique)Intervalvalidate_params)type_of_targetunique_labels)min_max_axis)_num_samplescheck_arraycheck_is_fitted)LabelBinarizerLabelEncoderMultiLabelBinarizerlabel_binarizec                   :     e Zd ZdZd Zd Zd Zd Z fdZ xZ	S )r   a  Encode target labels with value between 0 and n_classes-1.

    This transformer should be used to encode target values, *i.e.* `y`, and
    not the input `X`.

    Read more in the :ref:`User Guide <preprocessing_targets>`.

    .. versionadded:: 0.12

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,)
        Holds the label for each class.

    See Also
    --------
    OrdinalEncoder : Encode categorical features using an ordinal encoding
        scheme.
    OneHotEncoder : Encode categorical features as a one-hot numeric array.

    Examples
    --------
    `LabelEncoder` can be used to normalize labels.

    >>> from sklearn.preprocessing import LabelEncoder
    >>> le = LabelEncoder()
    >>> le.fit([1, 2, 2, 6])
    LabelEncoder()
    >>> le.classes_
    array([1, 2, 6])
    >>> le.transform([1, 1, 2, 6])
    array([0, 0, 1, 2]...)
    >>> le.inverse_transform([0, 0, 1, 2])
    array([1, 1, 2, 6])

    It can also be used to transform non-numerical labels (as long as they are
    hashable and comparable) to numerical labels.

    >>> le = LabelEncoder()
    >>> le.fit(["paris", "paris", "tokyo", "amsterdam"])
    LabelEncoder()
    >>> list(le.classes_)
    [np.str_('amsterdam'), np.str_('paris'), np.str_('tokyo')]
    >>> le.transform(["tokyo", "tokyo", "paris"])
    array([2, 2, 1]...)
    >>> list(le.inverse_transform([2, 2, 1]))
    [np.str_('tokyo'), np.str_('tokyo'), np.str_('paris')]
    c                 @    t        |d      }t        |      | _        | S )zFit label encoder.

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : returns an instance of self.
            Fitted label encoder.
        Twarnr	   r   classes_selfys     I/DATA/.local/lib/python3.12/site-packages/sklearn/preprocessing/_label.pyfitzLabelEncoder.fitZ   s      &
    c                 J    t        |d      }t        |d      \  | _        }|S )a  Fit label encoder and return encoded labels.

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        y : array-like of shape (n_samples,)
            Encoded labels.
        Tr#   return_inverser%   r'   s     r*   fit_transformzLabelEncoder.fit_transformk   s(     &"1T:qr,   c                     t        |        t        |      \  }}t        || j                  j                  d      }t        |      dk(  r|j                  g       S t        || j                        S )a  Transform labels to normalized encoding.

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        y : array-like of shape (n_samples,)
            Labels as normalized encodings.
        T)dtyper$   r   )uniques)r   r   r	   r&   r2   r   asarrayr   )r(   r)   xp_s       r*   	transformzLabelEncoder.transform|   s]     	a A$--"5"5DA?a::b>!q$--00r,   c           	         t        |        t        |      \  }}t        |d      }t        |      dk(  r|j	                  g       S t        j                  ||j                  | j                  j                  d   t        |            |      }|j                  d   rt        dt        |      z        |j	                  |      }|j                  | j                  |d      S )a  Transform labels back to original encoding.

        Parameters
        ----------
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        y_original : ndarray of shape (n_samples,)
            Original encoding.
        Tr#   r   r   r5   z'y contains previously unseen labels: %saxis)r   r   r	   r   r4   r   	setdiff1daranger&   shaper   
ValueErrorstrtake)r(   r)   r5   r6   diffs        r*   inverse_transformzLabelEncoder.inverse_transform   s     	a A&?a::b>!}}IIdmm))!,VAYI?

 ::a=FTRSSJJqMwwt}}aaw00r,   c                 v    t         |          }d|_        d|j                  _        d|j
                  _        |S )NTF)super__sklearn_tags__array_api_support
input_tagstwo_d_arraytarget_tagsone_d_labelsr(   tags	__class__s     r*   rG   zLabelEncoder.__sklearn_tags__   s7    w')!%&+#(,%r,   )
__name__
__module____qualname____doc__r+   r0   r7   rD   rG   __classcell__rO   s   @r*   r   r   (   s'    /b""1,1< r,   r   )auto_wrap_output_keysc                        e Zd ZU dZegegdgdZeed<   dddddZ e	d	
      d        Z
d Zd ZddZ fdZ xZS )r   a
  Binarize labels in a one-vs-all fashion.

    Several regression and binary classification algorithms are
    available in scikit-learn. A simple way to extend these algorithms
    to the multi-class classification case is to use the so-called
    one-vs-all scheme.

    At learning time, this simply consists in learning one regressor
    or binary classifier per class. In doing so, one needs to convert
    multi-class labels to binary labels (belong or does not belong
    to the class). `LabelBinarizer` makes this process easy with the
    transform method.

    At prediction time, one assigns the class for which the corresponding
    model gave the greatest confidence. `LabelBinarizer` makes this easy
    with the :meth:`inverse_transform` method.

    Read more in the :ref:`User Guide <preprocessing_targets>`.

    Parameters
    ----------
    neg_label : int, default=0
        Value with which negative labels must be encoded.

    pos_label : int, default=1
        Value with which positive labels must be encoded.

    sparse_output : bool, default=False
        True if the returned array from transform is desired to be in sparse
        CSR format.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,)
        Holds the label for each class.

    y_type_ : str
        Represents the type of the target data as evaluated by
        :func:`~sklearn.utils.multiclass.type_of_target`. Possible type are
        'continuous', 'continuous-multioutput', 'binary', 'multiclass',
        'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.

    sparse_input_ : bool
        `True` if the input data to transform is given as a sparse matrix,
         `False` otherwise.

    See Also
    --------
    label_binarize : Function to perform the transform operation of
        LabelBinarizer with fixed classes.
    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
        scheme.

    Examples
    --------
    >>> from sklearn.preprocessing import LabelBinarizer
    >>> lb = LabelBinarizer()
    >>> lb.fit([1, 2, 6, 4, 2])
    LabelBinarizer()
    >>> lb.classes_
    array([1, 2, 4, 6])
    >>> lb.transform([1, 6])
    array([[1, 0, 0, 0],
           [0, 0, 0, 1]])

    Binary targets transform to a column vector

    >>> lb = LabelBinarizer()
    >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])
    array([[1],
           [0],
           [0],
           [1]])

    Passing a 2D matrix for multilabel classification

    >>> import numpy as np
    >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
    LabelBinarizer()
    >>> lb.classes_
    array([0, 1, 2])
    >>> lb.transform([0, 1, 2, 1])
    array([[1, 0, 0],
           [0, 1, 0],
           [0, 0, 1],
           [0, 1, 0]])
    boolean	neg_label	pos_labelsparse_output_parameter_constraintsr      Fc                .    || _         || _        || _        y NrY   )r(   rZ   r[   r\   s       r*   __init__zLabelBinarizer.__init__  s    ""*r,   Tprefer_skip_nested_validationc                    | j                   | j                  k\  r&t        d| j                    d| j                   d      | j                  rC| j                  dk(  s| j                   dk7  r%t        d| j                   d| j                          t	        |      \  }}|r0| j                  r$t        |      st        d|j                   d      t        |d	
      | _        d| j                  v rt        d      t        |      dk(  rt        d|z        t        j                  |      | _        t        |      | _        | S )aa  Fit label binarizer.

        Parameters
        ----------
        y : ndarray of shape (n_samples,) or (n_samples, n_classes)
            Target values. The 2-d matrix should only contain 0 and 1,
            represents multilabel classification.

        Returns
        -------
        self : object
            Returns the instance itself.
        z
neg_label=z& must be strictly less than pos_label=.r   z`Sparse binarization is only supported with non zero pos_label and zero neg_label, got pos_label=z and neg_label=>`sparse_output=True` is not supported for array API namespace <. Use `sparse_output=False` to return a dense array instead.r)   )
input_namemultioutput@Multioutput target data is not supported with label binarizationy has 0 samples: %r)rZ   r[   r@   r\   r   r   rP   r   y_type_r   spissparsesparse_input_r   r&   )r(   r)   r5   is_array_apis       r*   r+   zLabelBinarizer.fit  sH    >>T^^+T^^, -!^^,A/ 
 4>>Q#6$..A:M!^^,ODNN;KM  )+LD..7J27N[[M *MM  &aC8DLL(R  ?a2Q677[[^%a(r,   c                 B    | j                  |      j                  |      S )a  Fit label binarizer/transform multi-class labels to binary labels.

        The output of transform is sometimes referred to as
        the 1-of-K coding scheme.

        Parameters
        ----------
        y : {ndarray, sparse matrix} of shape (n_samples,) or                 (n_samples, n_classes)
            Target values. The 2-d matrix should only contain 0 and 1,
            represents multilabel classification. Sparse matrix can be
            CSR, CSC, COO, DOK, or LIL.

        Returns
        -------
        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
            Shape will be (n_samples, 1) for binary problems. Sparse matrix
            will be of CSR format.
        )r+   r7   r'   s     r*   r0   zLabelBinarizer.fit_transformN  s    ( xx{$$Q''r,   c                    t        |        t        |      \  }}|r0| j                  r$t        |      st	        d|j
                   d      t        |      j                  d      }|r&| j                  j                  d      st	        d      t        || j                  | j                  | j                  | j                        S )a  Transform multi-class labels to binary labels.

        The output of transform is sometimes referred to by some authors as
        the 1-of-K coding scheme.

        Parameters
        ----------
        y : {array, sparse matrix} of shape (n_samples,) or                 (n_samples, n_classes)
            Target values. The 2-d matrix should only contain 0 and 1,
            represents multilabel classification. Sparse matrix can be
            CSR, CSC, COO, DOK, or LIL.

        Returns
        -------
        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
            Shape will be (n_samples, 1) for binary problems. Sparse matrix
            will be of CSR format.
        rf   rg   
multilabelz0The object was not fitted with multilabel input.)classesr[   rZ   r\   )r   r   r\   r   r@   rP   r   
startswithrl   r    r&   r[   rZ   )r(   r)   r5   rp   y_is_multilabels        r*   r7   zLabelBinarizer.transformd  s    ( 	(+LD..7J27N[[M *MM  )+66|D4<<#:#:<#HOPPMMnnnn,,
 	
r,   c                    t        |        t        |      \  }}|r0| j                  r$t        |      st	        d|j
                   d      || j                  | j                  z   dz  }| j                  dk(  rt        || j                  |      }n$t        || j                  | j                  ||      }| j                  r t        t        j                  |            }|S t        j                  |      r|j!                         }|S )a  Transform binary labels back to multi-class labels.

        Parameters
        ----------
        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
            Target values. All sparse matrices are converted to CSR before
            inverse transformation.

        threshold : float, default=None
            Threshold used in the binary and multi-label cases.

            Use 0 when ``Y`` contains the output of :term:`decision_function`
            (classifier).
            Use 0.5 when ``Y`` contains the output of :term:`predict_proba`.

            If None, the threshold is assumed to be half way between
            neg_label and pos_label.

        Returns
        -------
        y_original : {ndarray, sparse matrix} of shape (n_samples,)
            Target values. Sparse matrix will be of CSR format.

        Notes
        -----
        In the case when the binary labels are fractional
        (probabilistic), :meth:`inverse_transform` chooses the class with the
        greatest value. Typically, this allows to use the output of a
        linear model's :term:`decision_function` method directly as the input
        of :meth:`inverse_transform`.
        zY`LabelBinarizer` was fitted on a sparse matrix, and therefore cannot inverse transform a z array back to a sparse matrix.g       @
multiclassr:   )r   r   ro   r   r@   rP   r[   rZ   rl   _inverse_binarize_multiclassr&   _inverse_binarize_thresholdingr   rm   	csr_arrayrn   toarray)r(   Y	thresholdr5   rp   y_invs         r*   rD   z LabelBinarizer.inverse_transform  s    @ 	(+LD..7J27N''){{m3RT 
 $..8C?I<<<'0DMMbIE24<<	bE (e)<=E  [[MMOEr,   c                 h    t         |          }d|j                  _        d|j                  _        |S NFT)rF   rG   rI   rJ   rK   rL   rM   s     r*   rG   zLabelBinarizer.__sklearn_tags__  /    w')&+#(,%r,   r`   )rP   rQ   rR   rS   r   r]   dict__annotations__ra   r   r+   r0   r7   rD   rG   rT   rU   s   @r*   r   r      sm    Vr ZZ#$D  %&% +
 5/ 6/b(,)
V9v r,   r   
array-likezsparse matrixneither)closedrX   )r)   rt   rZ   r[   r\   Trb   r^   FrY   c          	       
   t        | t              st        | dddd      } nt        |       dk(  rt	        d| z        ||k\  rt	        dj                  ||            |r%|dk(  s|dk7  rt	        d	j                  ||            |dk(  }|r| }t        |       }d
|v rt	        d      |dk(  rt	        d      t        |       \  }}}	|r&|r$t        |      st	        d|j                   d      	 |j                  ||	      }t        | d      r| j                  d   n
t        |       }|j                  d   }t        | d      }|r)|j                  | j                   d      r| j                   }nt#        |      }|r:|j                  | j                   d      r|j%                  || j                   d      }|dk(  rP|dk(  rD|r&t'        t)        j*                  |dft,                    S |j/                  |df|      }||z  }|S |dk\  rd}|j1                  |      }|dk(  rRt        | d      r| j                  d   nt        | d         }||k7  r$t	        dj                  |t3        |                   |dv rt5        |       } t7        | ||       }| |   }|j9                  ||      }|j%                  ||      }|j;                  |j                  dg|	      |j=                  |d!      f      }|j?                  ||      }t)        j*                  tA        |tB        d"#      tA        |tB        d"#      tA        |tB        d"#      f||f$      }|s|j                  |jE                         |	      }n|dk(  r|r>t)        j*                  |       }|dk7  ry|j?                  |jF                  |      }||_#        nUt)        jH                  |       r| jE                         } |j                  | |	d%&      }|dk7  r|||dk7  <   nt	        d'|z        |s,|dk7  r|||dk(  <   |rd|||k(  <   |j%                  ||d      }n&|jF                  j%                  t,        d      |_#        |jK                  ||k7        r|j9                  ||      }|dd|f   }|dk(  r0|r|ddd(gf   }t'        |      S |jM                  |ddd(f   d)      }t'        |      S # t        t        f$ r}
t	        d|j                   d      |
d}
~
ww xY w)*a  Binarize labels in a one-vs-all fashion.

    Several regression and binary classification algorithms are
    available in scikit-learn. A simple way to extend these algorithms
    to the multi-class classification case is to use the so-called
    one-vs-all scheme.

    This function makes it possible to compute this transformation for a
    fixed set of class labels known ahead of time.

    Parameters
    ----------
    y : array-like or sparse matrix
        Sequence of integer labels or multilabel data to encode.

    classes : array-like of shape (n_classes,)
        Uniquely holds the label for each class.

    neg_label : int, default=0
        Value with which negative labels must be encoded.

    pos_label : int, default=1
        Value with which positive labels must be encoded.

    sparse_output : bool, default=False,
        Set to true if output binary array is desired in CSR sparse format.

    Returns
    -------
    Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
        Shape will be (n_samples, 1) for binary problems. Sparse matrix will
        be of CSR format.

    See Also
    --------
    LabelBinarizer : Class used to wrap the functionality of label_binarize and
        allow for fitting to classes independently of the transform operation.

    Examples
    --------
    >>> from sklearn.preprocessing import label_binarize
    >>> label_binarize([1, 6], classes=[1, 2, 4, 6])
    array([[1, 0, 0, 0],
           [0, 0, 0, 1]])

    The class ordering is preserved:

    >>> label_binarize([1, 6], classes=[1, 6, 4, 2])
    array([[1, 0, 0, 0],
           [0, 1, 0, 0]])

    Binary targets transform to a column vector

    >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])
    array([[1],
           [0],
           [0],
           [1]])
    r)   csrFN)rh   accept_sparse	ensure_2dr2   r   rk   z7neg_label={0} must be strictly less than pos_label={1}.zuSparse binarization is only supported with non zero pos_label and zero neg_label, got pos_label={0} and neg_label={1}ri   rj   unknownz$The type of target data is not knownz?`sparse_output=True` is not supported for array API 'namespace z='. Use `sparse_output=False` to return a dense array instead.r9   z>`classes` contains unsupported dtype for array API namespace 'z'.r?   r2   signed integerintegral)copybinaryr^   r2      rx   multilabel-indicatorz:classes {0} mismatch with the labels {1} found in the data)r   rx   r:   r;   cpu)r5   r   r?   T)r   r   z7%s target data is not supported with label binarization)r   r^   )'
isinstancelistr   r   r@   formatr   r   r   rP   r4   	TypeErrorhasattrr?   lenisdtyper2   r   astyper   rm   r{   intzerossortr   r	   r   searchsortedconcatcumulative_sum	full_liker   npr|   datarn   anyreshape)r)   rt   rZ   r[   r\   
pos_switchy_typer5   rp   device_e	n_samples	n_classesy_has_dtype
int_dtype_r}   sorted_classy_n_classesy_in_classesy_seenindicesindptrr   s                          r*   r    r      s/   L a #Ue4
 ?a2Q677IELL9
 	
 )q.IN vi+	
 	
 aJJ	AFN
 	
 ?@@ 8 ;Bg.A".E++ 'II
 	
**WW*5 &a1
s1vIa I!W%Krzz!''+;<WW
#B'
 rzz!'':6))GQWW5)9>+BLL)Qs,STTHHi^:H>Y!^!F777#L''$+Aw$7aggajS1Y#LSS]1-  ))O QB/<//,7yyz:

A3w
/!!,Q!7
 ||GY/ LLE2Bu52e4
 i(
 

199;w
7A	)	)QAA~||AFFI6{{1~IIK

1W4
8AA~%!q&	 EN
 	
 >!Aa1fI !Aa9nIIa%I0s/ 
vvg%&//,8ajM!bT'
A  "" 

1QU8W-A""e 	"  }B 
 	s   1S S=S88S=c                    t        j                  |       rt        j                  |      }| j	                         } | j
                  \  }}t        j                  |      }t        | d      d   }t        j                  | j                        }t        j                  ||      }t        j                  || j                  k(        }	|d   dk(  r*t        j                  |	t        | j                        g      }	t        j                  |	| j                  dd       }
t        j                  | j                   dg      }||	|
      }d|t        j"                  |dk(        d   <   t        j                  |      |dkD  |j%                         dk(  z     }|D ]M  }| j                   | j                  |   | j                  |dz       }|t        j&                  ||         d   ||<   O ||   S t)        | |      \  }}}|j                  ||      }|j+                  | d      }|j-                  |d|j
                  d   dz
        }||   S )z}Inverse label binarization transformation for multiclass.

    Multiclass uses the maximal score instead of a threshold.
    r^   r   r   Nr:   r9   r;   )rm   rn   r   r4   tocsrr?   r>   r   rC   r   repeatflatnonzeror   appendr   r   r   whereravelr=   r   argmaxclip)r)   rt   r5   r   	n_outputsoutputsrow_maxrow_nnzy_data_repeated_maxy_i_all_argmaxindex_first_argmax	y_ind_ext
y_i_argmaxsamplesiindr6   r   r   s                      r*   ry   ry     s   
 
{{1~**W% GGI ww	9))I&q!$Q'''!((# ii9(;qvv(EF 2;!YY~AFF}EN  __^QXXcr]KIIaii!-	~.@AB
01
288GqL)!,- ))I&!18L'MNA))AHHQK!((1q5/:C#BLL#$>?BJqM  z""1!;Aw**WW*5))AA)&'''1gmmA&6&:;wr,   c                    |dk(  rE| j                   dk(  r6| j                  d   dkD  r$t        dj                  | j                              t	        | |      \  }}}|j                  ||      }|dk7  r*| j                  d   |j                  d   k7  rt        d      t        | |      }t        | d	      r)|j                  | j                  d
      r| j                  }nt        |      }t        j                  |       r|dkD  r\| j                  dvr| j                         } t        j                  | j                   |kD  t"              | _        | j%                          nO|j                  | j'                         |kD  ||      } n)|j                  |j                  | ||      |kD  ||      } |dk(  rt        j                  |       r| j'                         } | j                   dk(  r| j                  d   dk(  r|| dddf      S |j                  d   dk(  r|j)                  |d   t+        |             S ||j-                  | d         S |dk(  r| S t        dj                  |            )z=Inverse label binarization transformation using thresholding.r      r^   z'output_type='binary', but y.shape = {0}r:   r9   r   zAThe number of class is not equal to the number of dimension of y.r2   r   )r   cscr   )r2   r   N)r   r   z{0} format is not supported)ndimr?   r@   r   r   r4   r
   r   r   r2   r   rm   rn   r   r   arrayr   r   eliminate_zerosr|   r   r   r   )	r)   output_typert   r~   r5   r6   r   dtype_r   s	            r*   rz   rz     s$    h166Q;1771:>BII!''RSS-aB7NB7jjj1Gh1771:q1A#AO
 	
 +14Fq'rzz!''3CDWW
#B'
 
{{1~q=xx~-GGIXXaffy0<AF

199;2*W
UAJJJJqwJ7)C  
 h;;q>		A66Q;1771:?1QT7##}}Q1$yySV44rzz!U344	.	. 6==kJKKr,   c                        e Zd ZU dZddgdgdZeed<   ddddZ ed	
      d        Z	 ed	
      d        Z
d Zd Zd Zd Z fdZ xZS )r   a?  Transform between iterable of iterables and a multilabel format.

    Although a list of sets or tuples is a very intuitive format for multilabel
    data, it is unwieldy to process. This transformer converts between this
    intuitive format and the supported multilabel format: a (samples x classes)
    binary matrix indicating the presence of a class label.

    Read more in the :ref:`User Guide <multilabelbinarizer>`.

    Parameters
    ----------
    classes : array-like of shape (n_classes,), default=None
        Indicates an ordering for the class labels.
        All entries should be unique (cannot contain duplicate classes).

    sparse_output : bool, default=False
        Set to True if output binary array is desired in CSR sparse format.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,)
        A copy of the `classes` parameter when provided.
        Otherwise it corresponds to the sorted set of classes found
        when fitting.

    See Also
    --------
    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
        scheme.

    Examples
    --------
    >>> from sklearn.preprocessing import MultiLabelBinarizer
    >>> mlb = MultiLabelBinarizer()
    >>> mlb.fit_transform([(1, 2), (3,)])
    array([[1, 1, 0],
           [0, 0, 1]])
    >>> mlb.classes_
    array([1, 2, 3])

    >>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])
    array([[0, 1, 1],
           [1, 0, 0]])
    >>> list(mlb.classes_)
    ['comedy', 'sci-fi', 'thriller']

    A common mistake is to pass in a list, which leads to the following issue:

    >>> mlb = MultiLabelBinarizer()
    >>> mlb.fit(['sci-fi', 'thriller', 'comedy'])
    MultiLabelBinarizer()
    >>> mlb.classes_
    array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',
        'y'], dtype=object)

    To correct this, the list of labels should be passed in as:

    >>> mlb = MultiLabelBinarizer()
    >>> mlb.fit([['sci-fi', 'thriller', 'comedy']])
    MultiLabelBinarizer()
    >>> mlb.classes_
    array(['comedy', 'sci-fi', 'thriller'], dtype=object)
    r   NrX   rt   r\   r]   Fc                     || _         || _        y r`   r   )r(   rt   r\   s      r*   ra   zMultiLabelBinarizer.__init__h  s    *r,   Trb   c                    d| _         | j                  2t        t        t        j
                  j                  |                  }nKt        t        | j                              t        | j                        k  rt        d      | j                  }t        d |D              rt        nt        }t        j                  t        |      |      | _        || j                  dd | S )a  Fit the label sets binarizer, storing :term:`classes_`.

        Parameters
        ----------
        y : iterable of iterables
            A set of labels (any orderable and hashable object) for each
            sample. If the `classes` parameter is set, `y` will not be
            iterated.

        Returns
        -------
        self : object
            Fitted estimator.
        NztThe classes argument contains duplicate classes. Remove these duplicates before passing them to MultiLabelBinarizer.c              3   <   K   | ]  }t        |t                y wr`   r   r   .0cs     r*   	<genexpr>z*MultiLabelBinarizer.fit.<locals>.<genexpr>  s     ?w!:a-w   r   )_cached_dictrt   sortedset	itertoolschainfrom_iterabler   r@   allr   objectr   emptyr&   )r(   r)   rt   r2   s       r*   r+   zMultiLabelBinarizer.fitl  s      !<<S!>!>q!ABCGT\\"#c$,,&77/  llG?w??VWU;"ar,   c                 t   | j                    | j                  |      j                  |      S d| _        t	        t
              }|j                  |_        | j                  ||      }t        ||j                        }t        d |D              rt
        nt        }t        j                  t        |      |      }||dd t        j                   |d      \  | _        }t        j$                  ||j&                     |j&                  j(                        |_        | j*                  s|j-                         }|S )aM  Fit the label sets binarizer and transform the given label sets.

        Parameters
        ----------
        y : iterable of iterables
            A set of labels (any orderable and hashable object) for each
            sample. If the `classes` parameter is set, `y` will not be
            iterated.

        Returns
        -------
        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)
            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`
            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR
            format.
        Nkeyc              3   <   K   | ]  }t        |t                y wr`   r   r   s     r*   r   z4MultiLabelBinarizer.fit_transform.<locals>.<genexpr>  s     ;s!:a-sr   r   Tr.   )rt   r+   r7   r   r   r   __len__default_factory
_transformr   getr   r   r   r   r   uniquer&   r4   r   r2   r\   r|   )r(   r)   class_mappingyttmpr2   inverses          r*   r0   z!MultiLabelBinarizer.fit_transform  s    $ <<#88A;((++  $C((5(=(=%__Q. ](9(9: ;s;;S7a!#=!NwZZ

 32::;K;KL
!!B	r,   c                     t        |        | j                         }| j                  ||      }| j                  s|j	                         }|S )a  Transform the given label sets.

        Parameters
        ----------
        y : iterable of iterables
            A set of labels (any orderable and hashable object) for each
            sample. If the `classes` parameter is set, `y` will not be
            iterated.

        Returns
        -------
        y_indicator : array or CSR matrix, shape (n_samples, n_classes)
            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in
            `y[i]`, and 0 otherwise.
        )r   _build_cacher   r\   r|   )r(   r)   class_to_indexr   s       r*   r7   zMultiLabelBinarizer.transform  sC      	**,__Q/!!B	r,   c           
          | j                   @t        t        | j                  t	        t        | j                                          | _         | j                   S r`   )r   r   zipr&   ranger   )r(   s    r*   r   z MultiLabelBinarizer._build_cache  s@    $ $Sc$-->P8Q%R SD   r,   c           	         t        j                   d      }t        j                   ddg      }t               }|D ]S  }t               }|D ]  }	 |j                  ||           |j	                  |       |j                  t        |             U |r3t        j                  dj                  t        |t                           t        j                  t        |      t              }	t        t!        j"                  |	||ft        |      dz
  t        |      f            S # t        $ r |j                  |       Y w xY w)a/  Transforms the label sets with a given mapping.

        Parameters
        ----------
        y : iterable of iterables
            A set of labels (any orderable and hashable object) for each
            sample. If the `classes` parameter is set, `y` will not be
            iterated.

        class_mapping : Mapping
            Maps from label to column index in label indicator matrix.

        Returns
        -------
        y_indicator : sparse matrix of shape (n_samples, n_classes)
            Label indicator matrix. Will be of CSR format.
        r   r   z%unknown class(es) {0} will be ignoredr   r   r^   r   )r   r   addKeyErrorextendr   r   warningsr$   r   r   rA   r   onesr   r   rm   r{   )
r(   r)   r   r   r   r   labelsindexlabelr   s
             r*   r   zMultiLabelBinarizer._transform  s   $ ++c"S1#&%FEE'IImE23  
 NN5!MM#g,'  MM7>>vgSV?WX wws7|3/#LLw'Fa]AS/T
 	
   'KK&'s   D""D?>D?c                    t        |        |j                  d   t        | j                        k7  r;t	        dj                  t        | j                        |j                  d               t        j                  |      r|j                         }t        |j                        dk7  r9t        t        j                  |j                  ddg            dkD  rt	        d      t        |j                  dd |j                  dd       D cg c]6  \  }}t        | j                  j                  |j                   ||             8 c}}S t        j                  |ddg      }t        |      dkD  rt	        dj                  |            |D cg c]&  }t        | j                  j#                  |            ( c}S c c}}w c c}w )a  Transform the given indicator matrix into label sets.

        Parameters
        ----------
        yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)
            A matrix containing only 1s and 0s.

        Returns
        -------
        y_original : list of tuples
            The set of labels for each sample such that `y[i]` consists of
            `classes_[j]` for each `yt[i, j] == 1`.
        r^   z/Expected indicator for {0} classes, but got {1}r   z+Expected only 0s and 1s in label indicator.Nr   z8Expected only 0s and 1s in label indicator. Also got {0})r   r?   r   r&   r@   r   rm   rn   r   r   r   r=   r   r   tuplerB   r   compress)r(   r   startend
unexpected
indicatorss         r*   rD   z%MultiLabelBinarizer.inverse_transform  s    	88A;#dmm,,AHH&  ;;r?B277|q Sbgg1v)F%G!%K !NOO #&biinbiim"D"DJE3 dmm((E#)>?@"D 
 b1a&1J:" NUU" 
 QSSPR*E$--00<=PRSS Ts   ;F<+Gc                 h    t         |          }d|j                  _        d|j                  _        |S r   )rF   rG   rI   rJ   rK   two_d_labelsrM   s     r*   rG   z$MultiLabelBinarizer.__sklearn_tags__,  r   r,   )rP   rQ   rR   rS   r]   r   r   ra   r   r+   r0   r7   r   r   rD   rG   rT   rU   s   @r*   r   r   "  s    >B !$'#$D 
 #'e + 5 6@ 5) 6)V4!(
T'TR r,   r   r`   )3r   r   r   collectionsr   numbersr   numpyr   scipy.sparsesparserm   sklearn.baser   r   r   sklearn.utilsr   r	   sklearn.utils._array_apir
   r   r   r   r   r   r   r   r   sklearn.utils._encoder   r   sklearn.utils._param_validationr   r   sklearn.utils.multiclassr   r   sklearn.utils.sparsefuncsr   sklearn.utils.validationr   r   r   __all__r   r   r    ry   rz   r    r,   r*   <module>r     s       #    F F <
 
 
 3 E B 2 O OM#]$ M`V%}D Vr O, >xtIFGxtIFG# #'	 -.% ^#	^#B, ^4LnN*MQU Nr,   