
    Q3jf                         d dl Zd dlZd dlmZ d dlmZ d dlmZ	 d dl
mZ d Zd Zej                  j                  dd	d
g      d        Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)    N)assert_array_equal)FeatureHasher)	transform)SCIPY_VERSION_BELOW_1_12c                  $   t        d      } d| j                  k(  sJ dddddd	d
g}t        d      j                  |      }d |D        }t        dd      j                  |      }t        |j	                         |j	                                y )N   
n_featuresdictbar*   %   )foodadatzarabazstring1)r   gagac              3   N   K   | ]  }t        |j                                 y wNiteritems.0ds     a/DATA/.local/lib/python3.12/site-packages/sklearn/feature_extraction/tests/test_feature_hasher.py	<genexpr>z,test_feature_hasher_dicts.<locals>.<genexpr>   s     
*Eq4	?E   #%pairr
   
input_type)r   r"   r   r   toarray)feature_hasherraw_XX1genX2s        r   test_feature_hasher_dictsr)   
   s    "b1N^.....B4eY6WXE	"	%	/	/	6B
*E
*C	"	8	B	B3	GBrzz|RZZ\2    c                  "   ddddj                  d      gdj                  d      ddgg} dD ]  }d|z  }d | D        }t        |d	d
      }|j                  |      }|j                  d   t	        |       k(  sJ |j                  d   |k(  sJ t
        r:|dgd d f   j                         dk(  sJ |dgd d f   j                         dk(  s2J |d   j                         dk(  sJ |d   j                         dk(  sJ |j                  dk(  rJ  y )Nr   r   r   asciiquux)   	      r         c              3       K   | ]  }|  y wr    r   xs     r   r   z.test_feature_hasher_strings.<locals>.<genexpr>   s     Aa   stringF)r
   r"   alternate_signr               )encoder   r   shapelenr   sumnnz)r%   lg_n_featuresr
   itr$   Xs         r   test_feature_hasher_stringsrF      s!    
uell734	g	v.E
 ,%
&!hu
 $$R(wwqzSZ'''wwqzZ'''#aS!V9==?a'''aS!V9==?a'''Q488:?"?Q488:?"?uuzz) ,r*   r"   list	generatorc                     d}ddg}| dk(  r	d |D        }t        dd      }t        j                  t        |	      5  |j	                  |       d
d
d
       y
# 1 sw Y   y
xY w)zhFeatureHasher raises error when a sample is a single string.

    Non-regression test for gh-13199.
    z"Samples can not be a single string	my_stringanother_stringrH   c              3       K   | ]  }|  y wr   r4   r5   s     r   r   z4test_feature_hasher_single_string.<locals>.<genexpr><   s     "EqEr7   
   r8   r!   )matchN)r   pytestraises
ValueErrorr   )r"   msgr%   r$   s       r   !test_feature_hasher_single_stringrS   3   sZ     /C*+E[ "E""bXFN	z	-  ' 
.	-	-s   AA!c                     ddddj                  d      gdj                  d      ddgg} d | D        }t        |dt        d      \  }}}d	 | D        }t        |dt        dd
      \  }}}t        ||       t        ||       d | D        }t        |dt        dd      \  }}}t	        j
                  t              5  t        ||       d d d        y # 1 sw Y   y xY w)Nr   r   r   r,   r-   c              3   .   K   | ]  }d  |D          yw)c              3   $   K   | ]  }|d f 
 ywr:   Nr4   r   fs     r   r   z8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>J        !q!1vq   Nr4   r5   s     r   r   z.test_hashing_transform_seed.<locals>.<genexpr>J        15a!q!5      Fc              3   .   K   | ]  }d  |D          yw)c              3   $   K   | ]  }|d f 
 ywrW   r4   rX   s     r   r   z8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>M   rZ   r[   Nr4   r5   s     r   r   z.test_hashing_transform_seed.<locals>.<genexpr>M   r\   r]   r   )seedc              3   .   K   | ]  }d  |D          yw)c              3   $   K   | ]  }|d f 
 ywrW   r4   rX   s     r   r   z8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>R   rZ   r[   Nr4   r5   s     r   r   z.test_hashing_transform_seed.<locals>.<genexpr>R   r\   r]   r:   )r>   _hashing_transformstrr   rO   rP   AssertionError)r%   raw_X_indicesindptr_	indices_0indptr_0	indices_1s           r   test_hashing_transform_seedrn   C   s     
uell734	g	v.E
 251F+FD#uEGVQ151F/c5qQIxw	*vx(151F(sEJOIq!	~	&7I. 
'	&	&s   <CCc                  :   d dddddddfD        } t        d	d
      }|j                  |       j                         \  }}t        t	        j
                  ||dk7                 }t        t	        j
                  ||dk7                 }ddg|k(  sJ g d|k(  sJ y )Nc              3   N   K   | ]  }t        |j                                 y wr   r   r   s     r   r   z,test_feature_hasher_pairs.<locals>.<genexpr>Y   s"      IA 	QWWYIr   r:   r2   r   r   r<   r;   r   r-   r   r   r    r!   r   )r:   r<   r;   )r   r   r#   sortednpabsr%   r$   x1x2x1_nzx2_nzs         r   test_feature_hasher_pairsr|   X   s    A&1R(HIE #bVDN%%e,446FB266"R1W+&'E266"R1W+&'Eq6U??r*   c                  @   d dddddddfD        } t        d	d
      }|j                  |       j                         \  }}t        t	        j
                  ||dk7                 }t        t	        j
                  ||dk7                 }ddg|k(  sJ g d|k(  sJ d ddiddifD        } |j                  |       j                         \  }}t	        j
                  ||dk7           }t	        j
                  ||dk7           }dg|k(  sJ dg|k(  sJ t        ||       y )Nc              3   N   K   | ]  }t        |j                                 y wr   r   r   s     r   r   z?test_feature_hasher_pairs_with_string_values.<locals>.<genexpr>f   s"      OA 	QWWYOr   r:   arq   abcr;   rr   rs   r   r    r!   r   )r:   r:   r;   c              3   N   K   | ]  }t        |j                                 y wr   r   r   s     r   r   z?test_feature_hasher_pairs_with_string_values.<locals>.<genexpr>q   s     G&FT!'')_&Fr   bax)r   r   r#   rt   ru   rv   r   rw   s         r   ,test_feature_hasher_pairs_with_string_valuesr   e   s'   C(%2*NOE #bVDN%%e,446FB266"R1W+&'E266"R1W+&'Eq6U??Gu~u~&FGE%%e,446FBFF2bAg;EFF2bAg;E3%<<3%<<r2r*   c                      d} g dt        t        d            g}t        | d      }|j                  |      }t	        |j                         t        j                  t        |      | f             y )Nr   r4   r   r8   r!   )	r   ranger   r   r   r#   ru   zerosr@   )r
   r%   r$   rE   s       r   test_hash_empty_inputr   z   sY    JT%(^$E"jXNN  'Aqyy{BHHc%j*-E$FGr*   c                  r    t               j                  ddig      } | j                  j                  dk(  sJ y )Nr   r   )r   )r   r   datar?   )rE   s    r   test_hasher_zerosr      s0    !!E1:,/A66<<4r*   c                  B   t        d      g} t        dd      j                  |       }|j                  j	                         dk  r|j                  j                         dkD  sJ t        dd      j                  |       }|j                  j	                         dkD  sJ y )NThequickbrownfoxjumpedTr8   )r9   r"   r   F)rG   r   fit_transformr   minmaxrE   Xts     r   test_hasher_alternate_signr      s    	&	'(A	dx	@	N	Nq	QB77;;=1!222	e	A	O	OPQ	RB77;;=1r*   c                  "   t        d      g} t        ddd      j                  |       }t        |j                  d         t        | d         k  sJ t        ddd      j                  |       }|j                  d   t        | d         k(  sJ y )Nr   Tr:   r8   )r9   r
   r"   r   F)rG   r   r   rv   r   r@   r   s     r   test_hash_collisionsr      s    	&	'(A	h
mA 
 rwwqz?S1Y&&&	x
mA  771:QqT"""r*   c                  T    t               } | j                         }|j                  rJ y)z3Test that FeatureHasher has requires_fit=False tag.N)r   __sklearn_tags__requires_fit)hashertagss     r   $test_feature_hasher_requires_fit_tagr      s)    _F""$D     r*   c                  t    t        d      } ddddddg}| j                  |      }|j                  dk(  sJ y	)
z6Test that FeatureHasher can transform without fitting.rM   r	   r:   r2   )dogcat   )r   run)r2   rM   N)r   r   r?   )r   r   results      r   )test_feature_hasher_transform_without_fitr      sB    b)Fa !A"67Dd#F<<7"""r*   )numpyru   rO   numpy.testingr   sklearn.feature_extractionr   (sklearn.feature_extraction._hashing_fastr   rd   sklearn.utils.fixesr   r)   rF   markparametrizerS   rn   r|   r   r   r   r   r   r   r   r4   r*   r   <module>r      sy      , 4 T 83< '<=( >(/*
*H # !#r*   