
    Q3j-                     t   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlZd dlZd dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0m1Z1m2Z2 dZ3dZ4e3e4z   Z5d Z6d Z7d Z8d Z9d Z:d Z;ejx                  j{                  deef      d        Z>d Z?d Z@d ZAd ZBd ZCd  ZDd! ZEd" ZFd# ZGd$ ZHd% ZId& ZJd' ZKd( ZLd) ZMd* ZNd+ ZOejx                  j                  e0d,-      d.        ZQd/ ZRd0 ZSd1 ZTd2 ZUd3 ZVejx                  j{                  deef      d4        ZWd5 ZXd6 ZYd7 ZZd8 Z[d9 Z\ejx                  j{                  deef      d:        Z]d; Z^d< Z_d= Z`d> Zad? Zbd@ Zcejx                  j{                  dAej                  ej                  ej                  g      dB        ZgdC ZhdD ZidE ZjdF ZkdG ZldH ZmdI ZndJ ZodK ZpdL ZqdM Zrejx                  j{                  deeef      dN        Zsejx                  j{                  dOej                  ej                  g      dP        Zvejx                  j{                  dQ ee1e2            dR        Zwejx                  j{                  dSej                  ej                  dTfej                  ej                  dTfej                  ej                  dUfej                  ej                  dUfg      dV        Zzejx                  j{                  dW edXY       edXY       edXY      g      dZ        Z{d[ Z|d\ Z}e.ejx                  j{                  d]e2      d^               Z~ejx                  j{                  d_eeeg      d`        Zejx                  j{                  d_eeeg      ejx                  j{                  dadbedcfddedefg      df               Zejx                  j{                  d_ee ej                  e      g      ejx                  j{                  dgdh di g      ejx                  j{                  djdddbg      dk                      Zejx                  j{                  d_eeeg      dl        Zejx                  j{                  deeeg      ejx                  j{                  dmdndogdddpddqdrdsdtf	ddu ddpddqdvdsdtf	ddw ddpdxdydzdvd{f	ddd| dpdxd} d~dsdf	dddddd ddsdf	dg      d               Zejx                  j{                  deddddddgfee3ff      d        Zd Zd Zejx                  j{                  d_eeeeg      d        Zejx                  j{                  d]e2      d        Zejx                  j{                  dej                  ej                  g      d        Zd Zd Zy)    N)defaultdict)Mapping)partial)StringIO)product)assert_array_almost_equalassert_array_equal)sparse)clone)ENGLISH_STOP_WORDSCountVectorizerHashingVectorizerTfidfTransformerTfidfVectorizerstrip_accents_asciistrip_accents_unicode
strip_tags)GridSearchCVcross_val_scoretrain_test_split)Pipeline)	LinearSVC)_align_api_if_sparse)assert_allclose_dense_sparseassert_almost_equalskip_if_32bit)_IS_WASMCSC_CONTAINERSCSR_CONTAINERS)zthe pizza pizza beer copyrightzthe pizza burger beer copyrightz!the the pizza beer beer copyrightzthe burger beer beer copyrightzthe coke burger coke copyrightzthe coke burger burger)zthe salad celeri copyrightz)the salad salad sparkling water copyrightzthe the celeri celeri copyrightzthe tomato tomato salad waterz the tomato salad water copyrightc                 4    t        |       j                         S N)r   upperss    W/DATA/.local/lib/python3.12/site-packages/sklearn/feature_extraction/tests/test_text.py	uppercaser&   :   s     #))++    c                 &    | j                  dd      S )N   ée)replacer#   s    r%   strip_eacuter,   >   s    99T3r'   c                 "    | j                         S r!   splitr#   s    r%   split_tokenizer0   B   s    779r'   c                     dgS )Nthe_ultimate_feature r#   s    r%   lazy_analyzer4   F   s    "##r'   c                     d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d	} d
}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d
}t        |       |k(  sJ y )N   àáâãäåçèéêëaaaaaaceeee   ìíîïñòóôõöùúûüýiiiinooooouuuuy   إu   ا   this is à testthis is a testu   öou   ̀́̂̃ u   ȫ)r   aexpecteds     r%   test_strip_accentsrB   J   s     AH #x///(A H #x/// 	AH #x/// 	AH #x/// 	AH #x/// 	#AH #x/// 	AH #x///r'   c                      d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ d} d}t        |       |k(  sJ y )	Nr6   r7   r8   r9   r:   r>   r;   r<   )r   r?   s     r%   test_to_asciirD   n   sz     AHq!X---(A Hq!X--- 	AHq!X--- 	AHq!X---r'   
Vectorizerc                     | d      j                         }d}g d} ||      |k(  sJ d}g d} ||      |k(  sJ  | d      j                         }t        d	      }g d
} ||      |k(  sJ  | t              j                         }d}g d} ||      |k(  sJ  | t        d      j                         }d}g d} ||      |k(  sJ y )Nasciistrip_accents:   J'ai mangé du kangourou  ce midi, c'était pas très bon.)
aimangedu	kangouroucemidietaitpastresbonz0This is a test, really.

 I met Harry yesterday.)thisistestreallymetharry	yesterdayfile)input'This is a test with a file-like object!)rU   rV   rW   withr\   likeobjectpreprocessoru;   J'ai mangé du kangourou  ce midi,  c'était pas très bon.)
AIMANGEDU	KANGOUROUCEMIDIETAITPASTRESBON)	tokenizerrI   )
zj'airL   rM   rN   rO   zmidi,zc'etaitrR   rS   zbon.)build_analyzerr   r&   r0   )rE   watextrA   s       r%   test_word_analyzer_unigramsrr      s    	'	*	9	9	;BGDH d8x?DLHd8x	&	!	0	0	2B=>DGHd8x 
	+	:	:	<BHDH d8x 
nG	D	S	S	UBGDH d8xr'   c                  b    t        ddd      j                         } d}g d} | |      |k(  sJ y )Nwordunicode      analyzerrI   ngram_rangerJ   )rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   zai mangezmange duzdu kangourouzkangourou cezce midiz
midi etaitz	etait paszpas tresztres bon)r   ro   )rp   rq   rA   s      r%   'test_word_analyzer_unigrams_and_bigramsr|      sA    	yf
n  HDH* d8xr'   c                  p   d} | j                  d      }t        dd      j                         }t        j                  t
              5   ||       d d d        t        ddd      j                         }t        j                  t
              5   ||       d d d        y # 1 sw Y   PxY w# 1 sw Y   y xY w)	NrJ   zutf-8rv   rG   )r{   encodingchar      )rz   r{   r~   )encoder   ro   pytestraisesUnicodeDecodeError)rq   
text_bytesrp   cas       r%   test_unicode_decode_errorr      s     HDW%J 
Vg	>	M	M	OB	)	*
: 
+ 
Vg
n  
)	*
: 
+	* 
+	* 
+	*s   	B 	B, B),B5c                  Z   t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ d
}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        ddd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nr   ru   r   ry   u9   J'ai mangé du kangourou  ce midi, c'était pas très bon)zj'az'aizai zi mz ma   )zs tresz tres ztres bzres bozes bon1This 
	is a test, really.

 I met Harry yesterday)thihisis zs iz is)z yesteyesteresterdsterdaterdayr\   r]   rz   r{   r^   r   ro   r   cngarq   rA   s      r%   test_char_ngram_analyzerr      s    yfn 	 GD2H:bq>X%%%AH:bc?h&&&BD2H:bq>X%%%AH:bc?h&&&v6n 	 =>D2H:bq>X%%%r'   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         } t        d      }g d} | |      d d |k(  sJ y )Nchar_wbru   r   ry   r   )z thr   r   r   z thir   )r   r   r   r   zerday r   r\   r   zA test with a file-like object!)z a z tetesestzst z tesr   r   r   s      r%   test_char_wb_ngram_analyzerr     s    )n 	 CD3H:bq>X%%%AH:bc?h&&&yfn 	 56D:H:bq>X%%%r'   c                     t        ddd      j                         } d}g d} | |      d d |k(  sJ g d} | |      d	d  |k(  sJ t        d
dd      j                         }t        |      } ||       | |      k(  sJ y )Nrt   ru   r   ry   r   )zthis is testzis test reallyztest really metr   )ztest really met harry yesterdayzthis is test really met harryz"is test really met harry yesterdayr\   r   r   )r   rq   rA   	cnga_filer\   s        r%   test_word_ngram_analyzerr      s    yfn 	 CDDH:bq>X%%%H
 :bc?h&&&v6n  D>DT?d4j(((r'   c                  B   ddd} t        | j                               }t        t        t        t        t        t              fD ]  } ||       }t        |      }|j                  t               t        |t              r|j                  | k(  sJ t        |j                        |k(  sJ |j                  t              }|j                  d   t!        |      k(  sJ  ||       }t        |      }|j#                  |      }t!        |      |j                  d   k(  rJ  y )Nr   rw   pizzabeer
vocabulary)setkeysdictlistiterr   r   intr   fitJUNK_FOOD_DOCS
isinstancer   vocabulary_	transformshapeleninverse_transform)vocabtermstypvvectXinvs          r%   &test_countvectorizer_custom_vocabularyr   7  s    #E

E dD'+s";<J!, a!##u,,,t''(E111NN>*wwqzSZ'''J!,$$Q'3x1771:%%% =r'   c                     ddg} t        dt        |       fdt               fg      }|j                  t              }t        |j                  d   j                        t        |       k(  sJ |j                  d   t        |       k(  sJ y )Nr   r   countr   tfidfrw   )
r   r   r   fit_transformALL_FOOD_DOCSr   named_stepsr   r   r   )what_we_likepiper   s      r%   /test_countvectorizer_custom_vocabulary_pipeliner   L  s    V$Lo>?&()	
D 	=)At(445\9JJJJ771:\****r'   c                      ddd} d}t        j                  t        |      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nr   r   z$Vocabulary contains repeated indicesmatchr   pasta_sizilianar   r   
ValueErrorr   r   )r   msgr   s      r%   7test_countvectorizer_custom_vocabulary_repeated_indicesr   Y  sE    #E
0C	z	-%0#$% 
.	-	-s   AAc                      ddd} t        j                  t        d      5  t        |       }|j	                  dg       d d d        y # 1 sw Y   y xY w)Nrw   rx   r   zdoesn't contain indexr   r   pasta_verdurar   r   r   s     r%   0test_countvectorizer_custom_vocabulary_gap_indexr   a  sA    #E	z)@	A%0/"# 
B	A	As   A		Ac                     t               } | j                  d       | j                         t        k(  sJ | j                  d       t	        j
                  t              5  | j                          d d d        | j                  d       t	        j
                  t              5  | j                          d d d        g d}| j                  |       | j                         t        |      k(  sJ y # 1 sw Y   xY w# 1 sw Y   JxY w)Nenglish
stop_words_bad_str_stop__bad_unicode_stop_)someotherwords)r   
set_paramsget_stop_wordsr   r   r   r   r   )cvstoplists     r%   test_countvectorizer_stop_wordsr   h  s    		BMMYM'"4444MM-M.	z	"
 
#MM1M2	z	"
 
#)HMMXM&#h-/// 
#	" 
#	"s   !C3%C?3C<?Dc                  @   t        j                  t        d      5  t        g       } | j	                  dg       d d d        t        j                  t        d      5  t        dd      }|j	                  g d       d d d        y # 1 sw Y   NxY w# 1 sw Y   y xY w)	Nzempty vocabularyr   r   foo      ?r   )max_dfr   )zto be or not to bez
and me toozand so do your   )r   r   s     r%   %test_countvectorizer_empty_vocabularyr   w  sr    	z);	<"-% 
= 
z);	<39=	CD 
=	<	 
=	< 
=	<s   B!BBBc                      t               } | j                  t        d d       }| j                  t        dd        }|j                  d   |j                  d   k7  sJ y )Nr   rw   )r   r   r   r   )r   X1X2s      r%   test_fit_countvectorizer_twicer     sV    		B			-+	,B			-+	,B88A;"((1+%%%r'   c                      g d} d}t        |      }|j                  |        g d}|j                         }t        ||       y)zCheck `get_feature_names_out()` when a custom token pattern is passed.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    z&This is the 1st document in my corpus.z This document is the 2nd sample.zAnd this is the 3rd one.zIs this the 4th document?z'[0-9]{1,3}(?:st|nd|rd|th)\s\b(\w{2,})\btoken_pattern)documentonesampleN)r   r   get_feature_names_outr	   )corpusr   
vectorizerrA   feature_names_outs        r%   )test_countvectorizer_custom_token_patternr     sG    
F ?M }=JV$,H"88:((3r'   c                      g d} d}d}t        |      }t        j                  t        |      5  |j	                  |        ddd       y# 1 sw Y   yxY w)zCheck that we raise an error if token pattern capture several groups.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12971
    r   z)([0-9]{1,3}(?:st|nd|rd|th))\s\b(\w{2,})\bz,More than 1 capturing group in token patternr   r   Nr   r   r   r   r   )r   r   err_msgr   s       r%   <test_countvectorizer_custom_token_pattern_with_several_groupr     sF    
F AM<G }=J	z	1v 
2	1	1s   AAc                  T   g d} d}t        d|       }t        j                  t        |      5  |j	                  |        d d d        t        j                         5  t        j                  dt               |j                  |        d d d        y # 1 sw Y   RxY w# 1 sw Y   y xY w)N)SampleUpperCase
VocabularyzyUpper case characters found in vocabulary while 'lowercase' is True. These entries will not be matched with any documentsT)	lowercaser   r   error)	r   r   warnsUserWarningr   warningscatch_warningssimplefilterr   )r   messager   s      r%   'test_countvectorizer_uppercase_in_vocabr    s     ;J	)  !4JGJ	k	1z" 
2 
	 	 	"g{3Z( 
#	" 
2	1 
#	"s   B,BBB'c                      g dg dg dg} t        dd      j                  |       }g d}|j                  |      }t        ||       y)	z0Check get_feature_names_out for TfidfTransformerrw   rw   rw   rw   rw   r   rw   r   r   Tl2
smooth_idfnorm)r@   cbN)r   r   r   r	   )r   trfeature_names_inr   s       r%   %test_tf_transformer_feature_names_outr    sI    	Iy)A	T	5	9	9!	<B&001AB'):;r'   c                  v   g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ y )Nr
  r  r  Tr  r  r   rx   rw   axisr   r   r   )r   r   toarrayallr   sumr   r  r   s      r%   test_tf_idf_smoothingr    s    	Iy)A	T	5BQ'')EQJ uaxnn!n4oF 
Iy)A	T	5BQ'')EQJr'   zcno floating point exceptions, see https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881)reasonc                     g dg dg dg} t        dd      }|j                  |       j                         }|dk\  j                         sJ t	        |dz  j                  d	
      g d       g dg dg dg} t        dd      }d}t        j                  t        |      5  |j                  |       j                          d d d        y # 1 sw Y   y xY w)Nr
  r  r  Fr  r  r   rx   rw   r  r  zdivide by zeror   )	r   r   r  r  r   r  r   r  RuntimeWarning)r   r  r   in_warning_messages       r%   test_tfidf_no_smoothingr#    s     
Iy)A	U	6BQ'')EQJ uaxnn!n4oF 
Iy)A	U	6B)	n,>	?
##% 
@	?	?s   # CCc                      dgdgdgg} t        ddd       }|j                  |       j                         }|d   dk(  sJ |d   |d   kD  sJ |d   |d   kD  sJ |d   dk  sJ |d   dk  sJ y )Nrw   rx   r   TF)sublinear_tfuse_idfr  r   )r   r   r  r  s      r%   test_sublinear_tfr'    s    
qcA3A	tU	FBQ'')E8q==8eAh8eAh8a<<8a<<r'   c                  .	   t        t        d d       } t        d   g}t        t              dz
  }t        d      }|j	                  |       }t        |d      r|j                         }|d|j                  d   f   dk(  sJ t        |j                  	      }||fD ]  }|j                  |      }t        |d      r|j                         }|j                  }|d|d
   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ d|vsJ d|vsJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  sJ |d|d   f   dk(  rJ  t        d      }	|	j                  |      j                  |      j                         }
t        |	j                        t        |j                        k(  sJ |
j                  |t        |j                        fk(  sJ |	j                        j                         }|j                  t        |      t        |j                        fk(  sJ t        dd      }|j                  |      j                  |      j                         }t        |d      rJ t        d      }t        j                  t               5  |j                  |       d d d        t#        t%        j&                  |d      dg|z         t        t        d d       } t)        d      }|j*                  |_        |j	                  |       j                         }|j,                  rJ t#        |
|       |j                  |      j                         }t#        ||       t        d 	      }t        j                  t               5  |j                  |        d d d        |j/                  dd       |j1                         }d}t3        |      } ||      }||k(  sJ |j/                  dd        t        j                  t               5  |j1                          d d d        d |_        t        j                  t               5  |j5                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   \xY w# 1 sw Y   y xY w)!Nrw         ?r   tocsrr   r   rx   r   saladtomatowaterthe	copyrightcokeburgerr   l1r  F)r  r&  idf_Tr&  r  r   rG   )rI   r   rJ   _gabbledegook_)rI   rc   _invalid_analyzer_type_)r   r   r   r   r   hasattrr,  r   r   r   r   r  r6  r   r   r   r   r   npr  r   r   fixed_vocabulary_r   build_preprocessorr   ro   )
train_data	test_datan_trainv1counts_trainv2r   counts_testr   t1r   
tfidf_testt2tft3tvtfidf2tfidf_test2v3	processorrq   rA   results                          r%   test_vectorizerrP    s\   mCR()Jr"#I- 1$G 
	$B##J/L|W%#))+2>>'223q888 
BNN	3B "Xkk),;(%++-K]]
1j112a7771j223q8881j112a777 J&&& *,,, 1j001Q6661j223q8881j001Q6661j112a777/ 4 
t	$BFF< **<8@@BErww<3r~~....;;7C$78888 k*224JIBNN0CDDDD 
tU	3B			'	'	5	=	=	?Br6""" 
$	'B	z	"
\" 
# bffRa03%'/B mCR()J	d	#B		BIj)113F####eV, ,,y)113Kj+6 
D	)B	z	"
Z  
# MM5M9%%'IGD"4(Ht_Fv MM 0tMD	z	"
 
# .BM	z	"
 
#	"Q 
#	", 
#	" 
#	"
 
#	"s0   Q&Q3Q?R&Q03Q<?RRc                     d\  } }}}t        | |||      }|j                  t               |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ d|_        d|_        d|_        d|_        |j                  j                  | k(  sJ |j                  j
                  |k(  sJ |j                  j                  |k(  sJ |j                  j                  |k(  sJ |j                  t               |j                  j                  |j                  k(  sJ |j                  j
                  |j
                  k(  sJ |j                  j                  |j                  k(  sJ |j                  j                  |j                  k(  sJ y )N)r  FFF)r  r&  r  r%  r4  T)r   r   r   _tfidfr  r&  r  r%  )r  r&  r  r%  rJ  s        r%   test_tfidf_vectorizer_settersrS  j  s   .G+D':|	7z
B FF>99>>T!!!99'''99:---99!!\111 BGBJBMBO99>>T!!!99'''99:---99!!\111FF>99>>RWW$$$99

***992==00099!!R__444r'   c                     t               } | j                  t              }|j                  }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ |D ]6  }t        t        j                  j                  |j                  d      d       8 t        dd      } | j                  t              }|j                  t        t              | j                  fk(  sJ |j                  | j                  k(  sJ |j                  }||kD  sJ |d|z  k  sJ t        j                  |j                        dkD  sJ t        j                  |j                        dk  sJ |D ]6  }t        t        j                  j                  |j                  d      d       8 y )	Nr)  r   rw   rx   r   rv   r4  )r{   r  )r   r   r   nnzr   r   
n_featuresdtyper;  mindatamaxr   linalgr  )r   r   	token_nnzrow
ngrams_nnzs        r%   test_hashing_vectorizerr_    s   A	M"AI77s=)1<<888877agg 66!&&>B66!&&>A66!&&>A66!&&>A BIINN388Q7=  	f48A	M"A77s=)1<<888877agg J	!!!I%%% 66!&&>B66!&&>A BIINN388Q7= r'   c                  4   t        d      } t        j                  t              5  | j	                          d d d        | j
                  rJ | j                  t              }|j                  \  }}t        | j                        |k(  sJ | j	                         }t        |t        j                        sJ |j                  t        k(  sJ t        |      |k(  sJ t!        g d|       t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  g d}t        |      } | j	                         }t!        g d|       | j
                  sJ t#        |      D ]%  \  }}|| j                  j%                  |      k(  r%J  y # 1 sw Y   XxY w)Nr*  r+  	r   r3  celerir2  r   r-  	sparklingr.  r/  r   )r   r   r   r   r   r<  r   r   r   r   r   r   r;  ndarrayrW  ra   r	   	enumerateget)r   r   	n_samplesrV  feature_namesidxnamer   s           r%   test_feature_namesrk    s|   		$B 
z	"
  " 
##### 	'AGGIzr~~*,,,,,.MmRZZ000&(((}+++
	
 	 }-	Tbnn((.... .
E 
E	*B,,.M
	
 	 }-	Tbnn((.... . 
#	"s   FFc                 ~    h d} | dd      }|j                  t               t        |j                        |k(  sJ y )N>   r   r   r-  r3  g333333?   )r   max_features)r   r   r   r   )rE   expected_vocabularyr   s      r%   test_vectorizer_max_featuresrp    s<    > 3Q7JNN=!z%%&*====r'   c                     t        d      } t        d      }t        d       }| j                  t              j                  d      }|j                  t              j                  d      }|j                  t              j                  d      }| j	                         }|j	                         }|j	                         }d|j                         k(  sJ d|j                         k(  sJ d|j                         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ d|t        j                  |         k(  sJ y )Nrw   rn  r   r   r     r0  )r   r   r   r  r   rZ  r;  argmax)	cv_1cv_3cv_Nonecounts_1counts_3counts_None
features_1
features_3features_Nones	            r%   "test_count_vectorizer_max_featuresr~    s;    *D*D40G!!.15515=H!!.15515=H''7;;;CK++-J++-J113M !!!! Jryy23333Jryy23333M"))K"89999r'   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nabcdeaeatr   r   rz   r   r@   r   r*  rm  rw   )r   r   r   r   r   r   r?  r   s     r%   test_vectorizer_max_dfr    s   %IF37DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r'   c                  L   g d} t        dd      }|j                  |        d|j                  j                         v sJ t	        |j                  j                               dk(  sJ d|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ d	|_        |j                  |        d|j                  j                         vsJ t	        |j                  j                               dk(  sJ y )
Nr  r   rw   )rz   min_dfr@   r   rx   r  g?)r   r   r   r   r   r  r  s     r%   test_vectorizer_min_dfr  *  s   %IF15DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r'   c                     ddg} t        dd      }|j                  |       j                         }t        g d|j	                                t        g dg dg|       t        ddd	
      }|j                  |       j                         }t        g dg dg|       t        ddd	t
        j                        }|j                  |       }|j                  t
        j                  k(  sJ y )Naaabcabbder   r   r  )r@   r  r  dr*   )r   rw   rw   r   r   )rw   rx   r   rw   rw   T)rz   r   binary)rw   rw   rw   r   r   )rw   rw   r   rw   rw   )rz   r   r  rW  )r   r   r  r	   r   r;  float32rW  )r?  r   r   X_sparses       r%   test_count_binary_occurrencesr  <  s    '"IF37D9%--/A0$2L2L2NO91= F3tDD9%--/A91= F3t2::VD!!),H>>RZZ'''r'   c                     ddg} t        ddd       }|j                  |       }t        j                  |dd j                        dk(  sJ t        j                  |dd	 j                        d	k(  sJ |j
                  t        j                  k(  sJ t        ddd
d       }|j                  |       }t        j                  |j                        dk(  sJ |j
                  t        j                  k(  sJ t        ddd
d t        j                        }|j                  |       }|j
                  t        j                  k(  sJ y )Nr  r  Fr   )alternate_signrz   r  r   rw   r   rx   T)rz   r  r  r  )rz   r  r  r  rW  )r   r   r;  rZ  rY  rW  float64)r?  r   r   s      r%   test_hashed_binary_occurrencesr  P  s   '"IEFNDy!A66!Aa&++!###66!Aa&++!###77bjj    dD 	y!A66!&&>Q77bjj    dRZZD 	y!A77bjj   r'   c                 ~   t         } |        }|j                  |      }|j                  |      }t        |t              sJ |j                         }t        ||      D ]g  \  }}t        j                  t        j                   ||                  }t        j                  t        j                  |            }t        ||       i t        j                  |      sJ |j                  dk(  sJ |j                         }	|j                  |	      }
t        ||
      D ]7  \  }}t        t        j                  |      t        j                  |             9 |j                         }|j                  |      }t        ||      D ]7  \  }}t        t        j                  |      t        j                  |             9 y )Ncsr)r   r   r   r   r   ro   zipr;  sortuniquer	   r
   issparseformatr  tocsc)rE   rY  r   transformed_datainversed_dataanalyzedocinversed_termsr   transformed_data2inversed_data2terms2transformed_data3inversed_data3terms3s                  r%   !test_vectorizer_inverse_transformr  j  sm    DJ!//5001ABMmT***'')G"47^		'#,/0>!:;5.1  8
 ??+,,,""e+++ )002112CDN]N;v2775>2776?; < )..0112CDN]N;v2775>2776?; <r'   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
d}t        ||dd      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ y )Nr)  rw   g?r   	test_sizerandom_stater   svcrw   rw   rv   hingesquared_hinge)vect__ngram_range	svc__lossr   )n_jobsr   r   )r   NOTJUNK_FOOD_DOCSr   r   r   r   r   r   r   predictr	   best_score_best_estimator_r   r{   rY  targetr>  r?  target_traintarget_testpipeline
parametersgrid_searchpredbest_vectorizers              r%   -test_count_vectorizer_pipeline_grid_selectionr    s    --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-/J xA!DK ??:|4<<YGDt[)
 ""c)))!11==fEO&&&000r'   c                     t         t        z   } dgt        t               z  dgt        t              z  z   }t        | |dd      \  }}}}t	        dt               fdt               fg      }dd	gd
dd}t        ||d      }|j                  ||      j                  |      }	t        |	|       |j                  dk(  sJ |j                  j                  d   }
|
j                  dk(  sJ |
j                  dk(  sJ |
j                   rJ y )Nr)  rw   g?r   r  r   r  r  rv   )r4  r  r  )r  
vect__normr  )r  r   r  )r   r  r   r   r   r   r   r   r   r  r	   r  r  r   r{   r  r<  r  s              r%   'test_vectorizer_pipeline_grid_selectionr    s%   --D TC''1#4E0F*FFF 8Hf!84J	< &/"34uik6JKLH %f-"/J xA>K ??:|4<<YGDt[)
 ""c)))!11==fEO&&&0004'''00000r'   c                      t         t        z   } dgt        t               z  dgt        t              z  z   }t        dt	               fdt               fg      }t        || |d      }t        |g d       y )Nr)  rw   r   r  r   )r   r  )r   r  r   r   r   r   r   r	   )rY  r  r  	cv_scoress       r%   )test_vectorizer_pipeline_cross_validationr    sj    --D TC''1#4E0F*FFF&/"34uik6JKLH$1=Iy/2r'   c                     d} t               }|j                  | g      }|j                  dk(  sJ t        d d      }|j	                  | g      }|j                  dk(  sJ |j
                  |j
                  k(  sJ t        t        j                  |j                        t        j                  |j                               y )Nu   Машинное обучение — обширный подраздел искусственного интеллекта, изучающий методы построения алгоритмов, способных обучаться.)rw      F)r  r  )rw   i   )
r   r   r   r   r   rU  r	   r;  r  rY  )r   r   	X_countedX_hasheds       r%   test_vectorizer_unicoder    s    	1  D""H:.I??g%%%$u=D~~xj)H>>Z''' ==HLL((( rwwy~~.0FGr'   c                      ddg} t        |       }|j                  t              }|j                  t              }t	        |j                         |j                                |j                  sJ y )Nr   rb  r   )r   r   r   r   r   r  r<  )r   r   X_1X_2s       r%   +test_tfidf_vectorizer_with_fixed_vocabularyr    sY    8$Jj1D


]
+C
..
'CckkmS[[];!!!!r'   c                     t               t        d      t        d      t        d      t               t        t              t        t              t        t              j	                  t
              t        t        	      j	                  t
              t               t        t              t               j	                  t
              g} | D ]  }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ |j                         |j                         k(  sJ t        |j                  t
              |j                  t
                      y )
Nr4  r5  T)r  rv   r{   rb   )rz   rH   )r   r   r   r4   r   r   r,   r   pickledumpsloadstype	__class__
get_paramsr   r   )	instancesorigr$   copys       r%   test_pickling_vectorizerr    s    t$&f-Z0.Z044^Dl377G.n-I LL||ADzT^^+++ DOO$5555$~.~.	
 r'   factoryc                     t               } | |      }d}t        j                  t        j                  |            } ||      } ||      }||k(  sJ y)z_Tokenizers cannot be pickled
    https://github.com/scikit-learn/scikit-learn/issues/12833
    rJ   N)r   r  r  r  )r  vecfunctionrq   roundtripped_functionrA   rO  s          r%   test_pickling_built_processorsr    sS     
Cs|HGD"LLh)?@~H"4(FXr'   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t        | j                  |dd            }t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y Nr   ra  d   r   F)sizer+   r   )r;  randomRandomStatearrayranger   choicer   r  r  r  r   r   r	   r   )rngvocab_wordsx	vocab_setr   unpickled_cvs         r%   -test_countvectorizer_vocab_sets_when_picklingr  4  s     ))


"C((
	
K 1c]

;Q
FG		2||FLL$45
}'$$&(J(J(L	
 r'   c                     t         j                  j                  d      } t        j                  g d      }t	        dd      D ]  }t               }| j                  |dd      }t	        dd      D ]
  }||||   <    t        |      }t        j                  t        j                  |            }|j                  t               |j                  t               t        |j                         |j                                 y r  )r;  r  r  r  r  r   r  r   r  r  r  r   r   r	   r   )r  r  r  
vocab_dictr   yr   r  s           r%   .test_countvectorizer_vocab_dicts_when_picklingr  P  s    
))


"C((
	
K 1c]V


;Q
>q!A#$JuQx  
3||FLL$45
}'$$&(J(J(L	
 r'   c                     t               j                  t              } t               j	                  |       }t        j                  |      }t        j                  |      }t        |      |j                  k(  sJ t        |j                  |       j                         |j                  |       j                                y r!   )r   r   r   r   r   r  r  r  r  r  r	   r  )r   r  r$   r  s       r%   test_pickling_transformerr  m  s    ''7A!!!$DTA<<?D:'''t))!,4468J8J18M8U8U8WXr'   c                  2   t               j                  t              } t               j	                  |       }t               }|j
                  |_        t        |j                  |       j                         |j                  |       j                                y r!   )	r   r   r   r   r   r6  r	   r   r  )r   r  r  s      r%   test_transformer_idf_setterr  v  si    ''7A!!!$DD		DIt~~a(002DNN14E4M4M4OPr'   c                     t        d      } | j                  t               t        | j                  d      }| j                  |_        t        |j                  t              j                         | j                  t              j                                t        | j                  d      }d}t        j                  t        |      5  | j                  |_        d d d        y # 1 sw Y   y xY w)NTr7  r   r&  Fz+`idf_` cannot be set when `user_idf=False`.r   )r   r   r   r   r6  r	   r   r  r   r   r   )r  r  r   s      r%   test_tfidf_vectorizer_setterr  ~  s    4(DHH^d&6&6ED		DI~&..0~&..0
 d&6&6FD;G	z	1II	 
2	1	1s   C''C0c                  &   t        d      } | j                  t               t        | j                  d      }t	        | j
                        }dg|dz   z  }t        j                  t              5  t        |d|       d d d        y # 1 sw Y   y xY w)NTr7  r  r   rw   r6  )
r   r   r   r   r   r6  r   r   r   setattr)r   r  expected_idf_leninvalid_idfs       r%   %test_tfidfvectorizer_invalid_idf_attrr    sn    4(DHH^d&6&6ED499~%+a/0K	z	"fk* 
#	"	"s   0BBc                      g d} t        |       }t        j                  t              5  |j	                  g        d d d        y # 1 sw Y   y xY w)N)r@   r  r  r@   r@   r   r   r   s     r%   test_non_unique_vocabr    s4    %Ee,D	z	" 
#	"	"s   AAc                      d} t         }d }t        j                  ||       5   |        d d d        y # 1 sw Y   y xY w)Nz?np.nan is an invalid document, expected byte or unicode string.c                  \    t               } | j                  dt        j                  dg       y )Nhello worldhello hello)r   r   r;  nan)hvs    r%   funcz0test_hashingvectorizer_nan_in_docs.<locals>.func  s#     
-?@r'   r   )r   r   r   )r  	exceptionr  s      r%   "test_hashingvectorizer_nan_in_docsr    s6     PGIA 
y	0 
1	0	0s   4=c                  0   t        ddd       } | j                  sJ | j                  ddg      j                         }t	        |j                         g d       | j                  ddg      j                         }t	        |j                         g d       y )NTF)r  r&  r  r  r	  )rw   rw   rw   r   )r   r  r   r  r	   ravelr   )r   r   r   s      r%   test_tfidfvectorizer_binaryr    ss    tU>A88O8	67??AAqwwy,/	
m]3	4	<	<	>Brxxz<0r'   c                      t        d      } | j                  t               t        | j                  | j
                  j                         y )NTr7  )r   r   r   r   r6  rR  )r   s    r%   test_tfidfvectorizer_export_idfr    s0    4(DHH^dii)9)9:r'   c                      t        dg      } t        |       }| j                  t               |j                  t               |j                  | j                  k(  sJ y )Nr0  r   )r   r   r   r   r   )
vect_vocabvect_vocab_clones     r%   test_vectorizer_vocab_cloner    sM     UG4JZ(NN=!''':+A+AAAAr'   c                    d} |        }t        j                  t        |      5  |j                  d       d d d        t        j                  t        |      5  |j	                  d       d d d        |j	                  ddg       t        j                  t        |      5  |j                  d       d d d        y # 1 sw Y   xY w# 1 sw Y   ^xY w# 1 sw Y   y xY w)NzBIterable over raw text documents expected, string object received.r   zhello world!	some textzsome other text)r   r   r   r   r   r   )rE   r  r  s      r%   &test_vectorizer_string_object_as_inputr    s     SG
,C	z	1.) 
2 
z	1 
2GG[+,-	z	1n% 
2	1 
2	1 
2	1 
2	1s#   B=C	"C=C	CCX_dtypec                     t        j                  dd| d      }t               j                  |      }|j                  |j                  k(  sJ y N
    N  *   rW  r  )r
   randr   r   rW  )r  r   X_transs      r%   test_tfidf_transformer_typer$    s?    BW2>A ..q1G==AGG###r'   zcsc_container, csr_containerc                 $   t        j                  ddt        j                  d      } | |      } ||      }t	               j                  |      }t	               j                  |      }t        ||       |j                  |j                  k(  sJ y r  )r
   r"  r;  r  r   r   r   r  )csc_containercsr_containerr   X_cscX_csrX_trans_cscX_trans_csrs          r%   test_tfidf_transformer_sparser,    sz     	BRZZbAA!E!E"$2259K"$2259K k:!3!3333r'   z0vectorizer_dtype, output_dtype, warning_expectedTFc                    t        j                  g d      }t        |       }d}|r6t        j                  t
        |      5  |j                  |      }d d d        nHt        j                         5  t        j                  dt
               |j                  |      }d d d        j                  |k(  sJ y # 1 sw Y   xY w# 1 sw Y   'xY w)N)numpyscipysklearnrW  z'dtype' should be used.r   r  )r;  r  r   r   r  r  r   r  r  r  rW  )vectorizer_dtypeoutput_dtypewarning_expectedr   r   warning_msg_matchX_idfs          r%   test_tfidf_vectorizer_typer7    s     	./A '78J1\\+->?,,Q/E @? $$&!!';7,,Q/E ' ;;,&&& @? '&s   B82,C8CCr  )rx   rw   r  c                    | j                   }t        j                  d| d      }t        j                  t
        |      5  | j                  dg       d d d        t        j                  t
        |      5  | j                  dg       d d d        t        | t              r7t        j                  t
        |      5  | j                  dg       d d d        y y # 1 sw Y   xY w# 1 sw Y   ]xY w# 1 sw Y   y xY w)NzInvalid value for ngram_range=z/ lower boundary larger than the upper boundary.r   zgood news everyone)r{   reescaper   r   r   r   r   r   r   r   )r  invalid_ranger  s      r%   $test_vectorizers_invalid_ngram_ranger<    s     OOMii
( 89 	9G
 
z	1%&' 
2 
z	1/01 
2 #()]]:W5MM/01 65 * 
2	1 
2	1 65s$   C7C&=C2C#&C/2C;c                     | j                         }| j                         }| j                         }| j                  |||      S r!   )r   build_tokenizerr=  _check_stop_words_consistency)	estimatorr   tokenize
preprocesss       r%   r?  r?  !  sA    ))+J((*H--/J22:z8TTr'   c                     d} d| z  }t               t               t               fD ]#  }|j                  g d       t	        j
                  t        |      5  |j                  dg       d d d        |`t	        j
                  t        |      5  t        |      du sJ 	 d d d        t        j                         5  t        j                  dt               |j                  dg       d d d        t        |      J |j                  g d	       t	        j
                  t        |      5  |j                  dg       d d d        & y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   |xY w# 1 sw Y   VxY w)
Nz\['and', 'll', 've'\]z}Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens %s not in stop_words.)you'veyouyou'llANDr   r   r  Fr  )rD  rE  rF  blahrG  )r   r   r   r   r   r  r  r   _stop_words_idr?  r  r  r  )lstrr  r  s      r%   'test_vectorizer_stop_words_inconsistentrK  (  s6   #D	')-	. 
  !?#46G6IJ"DE\\+W5}o. 6 \\+W505>>> 6 $$&!!';7}o. ' -S1999 	"LM\\+W5}o. 65# K55 65 '& 65s0   EE=-E%.E1E	E"	%E.	1E;	r'  c                 J    | dt         j                        }t         j                  }|j                  j                  |      |_        |j                  j                  |      |_        dddd}t               j                  ||      }||j                  j                  k(  sJ y)z
    Check that CountVectorizer._sort_features preserves the dtype of its sparse
    feature matrix.

    This test is skipped on 32bit platforms, see:
        https://github.com/scikit-learn/scikit-learn/pull/11295
    for more details.
    )r   r   r1  r   rw   rx   )zscikit-learnrV   zgreat!N)r;  int64indicesastypeindptrr   _sort_featuresrW  )r'  r   INDICES_DTYPEr   Xss        r%   7test_countvectorizer_sort_features_64bit_sparse_indicesrT  D  s     	fBHH-A HHM		  /AIxx}-AH"#1:J			)	)!Z	8BBJJ,,,,,r'   	Estimatorc                 .   ddig} |        }t        |      du sJ  | d dg      }t        |      dk(  sJ t        |      J |j                  |        G d d	|       } |dg
      }t        |      dk(  sJ  | d dg      }t        |      du sJ y )Nrq   r  Tc                     | d   S Nrq   r3   r  s    r%   <lambda>z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>g  s    1V9r'   and)rc   r   r  c                       e Zd Zd Zy)Ftest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimatorc                     d S )Nc                     | d   S rX  r3   rY  s    r%   rZ  zktest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessor.<locals>.<lambda>o  s    QvYr'   r3   )selfs    r%   r=  zYtest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessorn  s    &&r'   N)__name__
__module____qualname__r=  r3   r'   r%   CustomEstimatorr]  m  s    	'r'   rd  r   c                 J    t        j                  d      j                  |       S )Nz\w{1,})r9  compilefindallr  s    r%   rZ  z?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>u  s    bjj3;;C@r'   )rn   r   )r?  r   )rU  rY  r  rd  s       r%   -test_stop_word_validation_custom_preprocessorri  ^  s     [!"D
+C(-555
!4%
IC(-888(-555d') ' eW
-C(-888
@eWC )-555r'   zinput_type, err_type, err_msgfilenamer>   r\   z$'str' object has no attribute 'read'c                     dg}t        j                  ||      5   | d |      j                  |       d d d        y # 1 sw Y   y xY w)N"this is text, not file or filenamer   c                 "    | j                         S r!   r.   rY  s    r%   rZ  z.test_callable_analyzer_error.<locals>.<lambda>  s
    QWWYr'   rz   r]   )r   r   r   )rU  
input_typeerr_typer   rY  s        r%   test_callable_analyzer_errorrq  z  s=     11D	xw	/.jAOOPTU 
0	/	/s	   ?Arz   c                     t        | d      S )Nr)openrh  s    r%   rZ  rZ    s
    T#s^r'   c                 "    | j                         S r!   )readrh  s    r%   rZ  rZ    s
    r'   ro  c                     dg}t        j                  t        t        f      5   | ||      j	                  |       d d d        y # 1 sw Y   y xY w)Nrl  rn  )r   r   FileNotFoundErrorAttributeErrorr   )rU  rz   ro  rY  s       r%   &test_callable_analyzer_change_behaviorrz    s?     11D	)>:	;8:6DDTJ 
<	;	;s   AAc                     d }| j                  d      }|j                  d       t        j                  t        d      5   ||d      j                  |g       d d d        y # 1 sw Y   y xY w)Nc                     t        d      )Ntesting)	Exceptionrh  s    r%   rz   z6test_callable_analyzer_reraise_error.<locals>.analyzer  s    	""r'   zfile.txtzsample content
r}  r   r\   rn  )joinwriter   r   r~  r   )tmpdirrU  rz   fs       r%   $test_callable_analyzer_reraise_errorr    sU    
# 	JAGG	y		2862@@!E 
3	2	2s   A%%A.zjstop_words, tokenizer, preprocessor, ngram_range, token_pattern,analyzer, unused_name, ovrd_name, ovrd_msgrD  rF  r  r   z'stop_words'
'analyzer'	!= 'word'c                 "    | j                         S r!   r.   r#   s    r%   rZ  rZ    
    aggir'   z'tokenizer'c                 "    | j                         S r!   r.   r#   s    r%   rZ  rZ    r  r'   \w+rt   'token_pattern'zis not Nonec                 "    | j                         S r!   r"   r#   s    r%   rZ  rZ    r  r'   c                 "    | j                         S r!   r  r#   s    r%   rZ  rZ    r  r'   z'preprocessor'zis callablerv   c                 "    | j                         S r!   r  r#   s    r%   rZ  rZ    r  r'   z'ngram_range')	NNNr  r  r   r  r  r  c
                     t         }
 |        }|j                  ||||||       d|d|d|	}t        j                  t        |      5  |j                  |
       d d d        y # 1 sw Y   y xY w)N)r   rn   rc   r{   r   rz   zThe parameter z will not be used since  r   )r   r   r   r  r  r   )rE   r   rn   rc   r{   r   rz   unused_name	ovrd_nameovrd_msgr>  r   r   s                r%   test_unused_parameters_warnr    sn    r  J<DOO!#   	C
 
k	- 
.	-	-s   A&&A/zVectorizer, Xrw   rx   )r   barr   )r   bazc                 l     |        }t        |d      rJ |j                  |       t        |d      rJ y )Nn_features_in_)r:  r   )rE   r   r   s      r%   test_n_features_inr    s<     Jz#3444NN1z#34444r'   c                      t        d      } | j                  ddg      j                  }| j                  ddg      j                  }||k(  sJ y )Nrw   rr  helloworld)r   r   r   )r  vocab1vocab2s      r%   )test_tie_breaking_sample_order_invariancer  '  sN     q
)CWWgw'(44FWWgw'(44FVr'   c                  j    t        dd      } | j                  dg      j                  }|d   dk\  sJ y )Ni@B )rx   r   )rV  r{   z22pcs efuturer   )r   r   rN  )hashingrN  s     r%   2test_nonnegative_hashing_vectorizer_result_indicesr  0  s7    7GG 12::G1:??r'   c                 .     |        }t        |d      rJ y)z0Check that vectorizers do not define set_output.
set_outputN)r:  )rU  r   s     r%   'test_vectorizers_do_not_have_set_outputr  7  s    
 +CsL))))r'   c                    t        j                  ddt        j                  d      } | |      }|j	                         }t               j                  |      }|j                  |d      }t        ||       ||usJ |j                  |d      }t        |      |u r||u s~J ||usJ |j                  |j                  u sJ |j                  j                  |j                  j                  u sJ |j                  j                  |j                  j                  u sJ t        j                  t               5  t        ||       ddd       y# 1 sw Y   yxY w)	zJCheck the behaviour of TfidfTransformer.transform with the copy parameter.r  r  r   r!  T)r  FN)r
   r"  r;  r  r  r   r   r   r   r   rP  rN  baserY  r   r   AssertionError)r'  r   r)  X_csr_originaltransformerX_transforms         r%   test_tfidf_transformer_copyr  @  s3    	BRZZbAA!E ZZ\N"$((/K''D'9K 7e###''E':K E"e+e###%'''!!U\\111""''5==+=+====$$

777	~	&$UN; 
'	&	&s   9EErW  c                     t        d      D cg c]  }t        t        j                               ! }}t	        |       j                  |      }|j                  j                  | k(  sJ yc c}w )zCheck that `idf_` has the same dtype as the input data.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/30016
    i r1  N)r  struuiduuid4r   r   r6  rW  )rW  ir   r   s       r%   (test_tfidf_vectorizer_preserve_dtype_idfr  ^  sY     %*'N3NqTZZ\	NA3 u-11!4J??  E))) 	4s   $A+c                  T    t               } | j                         }|j                  rJ y)z7Test that HashingVectorizer has requires_fit=False tag.N)r   __sklearn_tags__requires_fit)r   tagss     r%   (test_hashing_vectorizer_requires_fit_tagr  j  s*    "$J&&(D     r'   c                  h    t        d      } ddg}| j                  |      }|j                  dk(  sJ y)z:Test that HashingVectorizer can transform without fitting.r  )rV  zThis is testzAnother test)rx   r  N)r   r   r   )r   r   rO  s      r%   -test_hashing_vectorizer_transform_without_fitr  q  s8    "b1Jn-F!!&)F<<7"""r'   )r  r9  r  r  collectionsr   collections.abcr   	functoolsr   ior   	itertoolsr   r.  r;  r   numpy.testingr   r	   r/  r
   sklearn.baser   sklearn.feature_extraction.textr   r   r   r   r   r   r   r   sklearn.model_selectionr   r   r   sklearn.pipeliner   sklearn.svmr   sklearn.utilsr   sklearn.utils._testingr   r   r   sklearn.utils.fixesr   r   r   r   r  r   r&   r,   r0   r4   rB   rD   markparametrizerr   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  xfailr#  r'  rP  rS  r_  rk  rp  r~  r  r  r  r  r  r  r  r  r  r  r  ro   r=  r>  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r$  r,  int32rM  r7  r<  r?  rK  rT  ri  rx  ry  rq  paramrz  r  r  r  r  r  r  r  r  r  r  r3   r'   r%   <module>r     sH    	   # #      G  	 	 	 T S % ! . 
 I H  !22, $!0H.* 9J'KL:  M: z <&&4&().&*
+&$0E&4&&)*<  	M  &&&dN5:#>LD/N 'IJ> K>:4-$-$((!4 'IJ< K<>!1H$1N
3H0"
6 &&**''


8
:YQ +1;B ?O5FG&& RZZ$<=$ >$ "GNN$K44 6	2::t$	2::t$	RZZ'	RZZ'	'' 	f-F+F+22(U/8 .9- : -0 /?4EF662 /?4EF #	&+	!GHVV &' +-CD 
';<K =K /?4EF	F	F ?$5G 	5
 x 
	
 
	
 
	
 
	
 
	

	
qCITUIZ@ 	Qq11Q3GHI	.)55 /?4DFWX** .9< :<: 2::rzz":;* <*!#r'   