
    Q3jP              	          d Z ddlZddlZddlZddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z) ejT                  jW                  d      Z,ddgddgddgddgddgddggZ-g dZ.g dZ/ddgddgddggZ0g dZ1g dZ2 ejf                         Z4e,jk                  e4jl                  jn                        Z8 ee4jr                  e4jl                  e,      \  e4_9        e4_6         ejt                         Z; ee;jr                  e;jl                  e,      \  e;_9        e;_6        d Z<d Z=d Z>d Z?ej                  j                  dg d      d        ZBd  ZCd! ZDd" ZEd# ZFd$ ZGd% ZHd& ZIej                  j                  d' eJg e&e'e)e%e(e&d(e'z  z               d)        ZKej                  j                  d' eJg e&e'e)e%e(e&d(e'z  z               d*        ZLd+ ZMd, ZNd- ZOd. ZPd/ ZQej                  j                  d0 e       e4jr                  e4jl                  f e       e;jr                  e;jl                  fg      d1        ZRd2 ZSd3 ZTy)4z6Testing for the boost module (sklearn.ensemble.boost).    N)datasets)BaseEstimatorclone)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressor)LinearRegressionLogisticRegression)GridSearchCVtrain_test_split)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)assert_allcloseassert_array_almost_equalassert_array_equal)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSDOK_CONTAINERSLIL_CONTAINERS      )foor    r    r   r   r   )r   r   r   r   r   r      )r    r   r   )r   r   r   random_statec                     t        j                  t        t                    } t	               j                  t        |       }t        |j                  t              t        j                  t        t              df             y )Nr   )nponeslenXr   fitr   predict_proba)y_tclfs     X/DATA/.local/lib/python3.12/site-packages/sklearn/ensemble/tests/test_weight_boosting.pytest_oneclass_adaboost_probar.   6   sP     ''#a&/C


"
"1c
*Cc//2BGGSVQK4HI    c                     t        d      } | j                  t        t               t	        | j                  t              t               t	        t        j                  t        j                  t                    | j                         | j                  t              j                  t        t              dfk(  sJ | j                  t              j                  t        t              fk(  sJ y )Nr   r"   r   )r   r)   r(   y_classr   predictT	y_t_classr%   uniqueasarrayclasses_r*   shaper'   decision_functionr,   s    r-   test_classification_toyr;   ?   s    
!
,CGGAws{{1~y1ryyI!67FQ%%#a&!444  #))c!fY666r/   c                      t        d      } | j                  t        t               t	        | j                  t              t               y Nr   r"   )r	   r)   r(   y_regrr   r2   r3   y_t_regrr:   s    r-   test_regression_toyr@   I   s,    

+CGGAvs{{1~x0r/   c                     t        j                  t        j                        } t	               }|j                  t        j                  t        j                         t        | |j                         |j                  t        j                        }|j                  d   t        |       k(  sJ |j                  t        j                        j                  d   t        |       k(  sJ |j                  t        j                  t        j                        }|dkD  s
J d|       t        |j                        dkD  sJ t        t        d |j                  D                    t        |j                        k(  sJ y )Nr   g?zFailed with score = c              3   4   K   | ]  }|j                     y wNr"   .0ests     r-   	<genexpr>ztest_iris.<locals>.<genexpr>c        ?3##   )r%   r5   iristargetr   r)   datar   r7   r*   r8   r'   r9   scoreestimators_set)classesr,   probarM   s       r-   	test_irisrR   P   s   ii$G

CGGDIIt{{#w-dii(E;;q>S\)))  +11!4GDDDIIdii-E3;1/uj11; s!###s?s??@CDXXXXr/   loss)linearsquareexponentialc                    t        | d      }|j                  t        j                  t        j                         |j                  t        j                  t        j                        }|dkD  sJ t        |j                        dkD  sJ t        t        d |j                  D                    t        |j                        k(  sJ y )Nr   )rS   r#   g?r   c              3   4   K   | ]  }|j                     y wrC   r"   rD   s     r-   rG   z test_diabetes.<locals>.<genexpr>q   rH   rI   )	r	   r)   diabetesrL   rK   rM   r'   rN   rO   )rS   regrM   s      r-   test_diabetesr[   f   s     A
6CGGHMM8??+IIhmmX__5E4<< s!###s?s??@CDXXXXr/   c                     t         j                  j                  d      } | j                  dt        j
                  j                        }| j                  dt        j
                  j                        }t        d      }|j                  t        j                  t        j
                  |       |j                  t        j                        }|j                  t        j                        D cg c]  }| }}|j                  t        j                        }|j                  t        j                        D cg c]  }| }}|j                  t        j                  t        j
                  |      }	|j!                  t        j                  t        j
                  |      D 
cg c]  }
|
 }}
t#        |      dk(  sJ t%        ||d          t#        |      dk(  sJ t%        ||d          t#        |      dk(  sJ t%        |	|d          t'        dd      }|j                  t        j                  t        j
                  |       |j                  t        j                        }|j                  t        j                        D cg c]  }| }}|j                  t        j                  t        j
                  |      }	|j!                  t        j                  t        j
                  |      D 
cg c]  }
|
 }}
t#        |      dk(  sJ t%        ||d          t#        |      dk(  sJ t%        |	|d          y c c}w c c}w c c}
w c c}w c c}
w )Nr   
   sizen_estimatorssample_weightr   ra   r#   )r%   randomRandomStaterandintrJ   rK   r8   rY   r   r)   rL   r2   staged_predictr*   staged_predict_probarM   staged_scorer'   r   r	   )rngiris_weightsdiabetes_weightsr,   predictionspstaged_predictionsrQ   staged_probasrM   sstaged_scoress               r-   test_staged_predictrt   t   s   
))


"C;;r(9(9;:L{{2HOO,A,A{B
"
-CGGDIIt{{,G?++dii(K%(%7%7		%BC%B!%BCdii(E # 8 8 CD C1Q CMDIIdiiLIIE##DIIt{{,#WWaW   !"b(((k+=b+AB}###e]2%67}###e]2%67 !
<CGGHMM8??:JGK++hmm,K%(%7%7%FG%F!%FGIIhmmX__DTIUE !!MM8??:J " 

A 	

   !"b(((k+=b+AB}###e]2%67A DD  Hs   2	M=	M*	M	M<	Mc                  R   t        t                     } ddd}t        | |      }|j                  t        j
                  t        j                         t        t               d      } ddd}t        | |      }|j                  t        j
                  t        j                         y )N	estimator)r   r   )ra   estimator__max_depthr   rw   r#   )
r   r   r   r)   rJ   rL   rK   r	   r   rY   )boost
parametersr,   s      r-   test_gridsearchr|      s     )?)ABE &J uj
)CGGDIIt{{# (=(?aPE"(&IJ
uj
)CGGHMM8??+r/   c                  l   dd l } t               }|j                  t        j                  t        j
                         |j                  t        j                  t        j
                        }| j                  |      }| j                  |      }t        |      |j                  k(  sJ |j                  t        j                  t        j
                        }||k(  sJ t        d      }|j                  t        j                  t        j
                         |j                  t        j                  t        j
                        }| j                  |      }| j                  |      }t        |      |j                  k(  sJ |j                  t        j                  t        j
                        }||k(  sJ y r=   )pickler   r)   rJ   rL   rK   rM   dumpsloadstype	__class__r	   rY   )r~   objrM   rr   obj2score2s         r-   test_pickler      s!    
CGGDIIt{{#IIdii-ESA<<?D:&&&ZZ		4;;/FF?? 
+CGGHMM8??+IIhmmX__5ESA<<?D:&&&ZZx7FF??r/   c            	         t        j                  ddddddd      \  } }t               }|j                  | |       |j                  }|j
                  d   dk(  sJ |d dt        j                  f   |dd  k\  j                         sJ y )Ni  r]   r!   r   Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeatedr   r#   )	r   make_classificationr   r)   feature_importances_r8   r%   newaxisall)r(   yr,   importancess       r-   test_importancesr      s    ''DAq 
CGGAqM**KQ2%%%BJJ';qr?:??AAAr/   c                     t               } t        j                  d      }t        j                  t
        |      5  | j                  t        t        t        j                  dg             d d d        y # 1 sw Y   y xY w)Nz*sample_weight.shape == (1,), expected (6,)matchr   rb   )r   reescapepytestraises
ValueErrorr)   r(   r1   r%   r6   )r,   msgs     r-   ,test_adaboost_classifier_sample_weight_errorr      sP    

C
))@
AC	z	-7"**bT*:; 
.	-	-s   1A55A>c                  N   ddl m}  t         |              }|j                  t        t
               t        t                     }|j                  t        t               ddl m} t         |       d      }|j                  t        t
               t        t               d      }|j                  t        t
               ddgddgddgddgg}g d}t        t                     }t        j                  t        d      5  |j                  ||       d d d        y # 1 sw Y   y xY w)	Nr   )RandomForestClassifier)RandomForestRegressorr"   r   )r    barr   r   zworse than randomr   )sklearn.ensembler   r   r)   r(   r>   r   r1   r   r	   r   r   r   r   )r   r,   r   X_faily_fails        r-   test_estimatorr      s    7 35
6CGGAv
SU
#CGGAw6
13!
DCGGAv
CE
2CGGAv !fq!fq!fq!f-F!F
SU
#C	z)<	= 
>	=	=s   ?DD$c                      d} t        dd      }t        j                  t        |       5  |j	                  t
        j                  t
        j                         d d d        y # 1 sw Y   y xY w)Nz+Sample weights have reached infinite values   g      7@)ra   learning_rater   )r   r   warnsUserWarningr)   rJ   rL   rK   )r   r,   s     r-   test_sample_weights_infiniter     sC    
7C
"D
AC	k	-		4;;' 
.	-	-s   /A##A,z(sparse_container, expected_internal_type   c                     G d dt               }t        j                  dddd      \  }}t        j                  |      }t        ||d	      \  }}}} | |      }	 | |      }
t         |       d
      j                  |	|      }t         |       d
      j                  ||      }|j                  |
      }|j                  |      }t        ||       |j                  |
      }|j                  |      }t        ||       |j                  |
      }|j                  |      }t        ||       |j                  |
      }|j                  |      }t        ||       |j                  |
|      }|j                  ||      }t        ||       |j                  |
      }|j                  |      }t!        ||      D ]  \  }}t        ||        |j#                  |
      }|j#                  |      }t!        ||      D ]  \  }}t        ||        |j%                  |
      }|j%                  |      }t!        ||      D ]  \  }}t        ||        |j'                  |
|      }|j'                  ||      }t!        ||      D ]  \  }}t        ||        |j(                  D cg c]  }|j*                   }}t-        fd|D              sJ y c c}w )Nc                   $     e Zd ZdZd fd	Z xZS )Atest_sparse_classification.<locals>.CustomProbabilisticClassifierzGLogisticRegression variant that records the nature of the training set.c                 L    t         |   |||       t        |      | _        | S z<Modification on fit caries data type for later verification.rb   superr)   r   
data_type_selfr(   r   rc   r   s       r-   r)   zEtest_sparse_classification.<locals>.CustomProbabilisticClassifier.fit!  %    GK1MK:"1gDOKr/   rC   __name__
__module____qualname____doc__r)   __classcell__r   s   @r-   CustomProbabilisticClassifierr     s    U	 	r/   r   r         *   )	n_classesr   r   r#   r   r"   ry   c              3   6   K   | ]  }t        |        y wrC   
issubclassrE   texpected_internal_types     r-   rG   z-test_sparse_classification.<locals>.<genexpr>r       Dez!34e   )r   r   make_multilabel_classificationr%   ravelr   r   r)   r2   r   r9   r   predict_log_probar*   rM   staged_decision_functionziprh   ri   rj   rN   r   r   )sparse_containerr   r   r(   r   X_trainX_testy_trainy_testX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_clf_resultsdense_clf_resultssparse_clf_resdense_clf_resitypess    `                 r-   test_sparse_classificationr     s    (:  22rabDAq 	A'711'M$GVWf%g.N$V,M +/1 
c.'"  */1 
c'7  +22=A(008)+<= +<<]K(::6B02CD +<<]K(::6B02CD +88G(66v>02CD +00G(..vv>02CD +CCMR(AA&I),-?AR)S%!.-@ *T +99-H(77?),-?AR)S%>=9 *T +??N(==fE),-?AR)S%!.-@ *T +77vN(55ffE),-?AR)S%>=9 *T $5#@#@A#@aQ\\#@EADeDDDD Bs   )Kc                     G d dt               }t        j                  dddd      \  }}t        ||d	      \  }}}} | |      }	 | |      }
t	         |       d
      j                  |	|      }t	         |       d
      j                  ||      }|j                  |
      }|j                  |      }t        ||       |j                  |
      }|j                  |      }t        ||      D ]  \  }}t        ||        |j                  D cg c]  }|j                   }}t        fd|D              sJ y c c}w )Nc                   $     e Zd ZdZd fd	Z xZS ))test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.c                 L    t         |   |||       t        |      | _        | S r   r   r   s       r-   r)   z-test_sparse_regression.<locals>.CustomSVR.fit  r   r/   rC   r   r   s   @r-   	CustomSVRr     s    F	 	r/   r   r   2   r   r   )r   r   	n_targetsr#   r   r"   ry   c              3   6   K   | ]  }t        |        y wrC   r   r   s     r-   rG   z)test_sparse_regression.<locals>.<genexpr>  r   r   )r   r   make_regressionr   r	   r)   r2   r   rh   r   rN   r   r   )r   r   r   r(   r   r   r   r   r   r   r   sparse_regressordense_regressorsparse_regr_resultsdense_regr_resultssparse_regr_resdense_regr_resr   r   s    `                 r-   test_sparse_regressionr   u  sO    C  ##qrDAq (811'M$GVWf%g.N$V,M )9;QOSS
 ()+ANRRO
 +22=A(00813EF +99-H(77?+./BDV+W'!/>B ,X $4#?#?@#?aQ\\#?E@DeDDDD As   D>c                       G d dt               } t         |        d      }|j                  t        t               t        |j                        t        |j                        k(  sJ y)z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                       e Zd Zd Zd Zy)=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                      y rC    )r   r(   r   s      r-   r)   zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fit  s    r/   c                 F    t        j                  |j                  d         S )Nr   )r%   zerosr8   )r   r(   s     r-   r2   zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predict  s    88AGGAJ''r/   N)r   r   r   r)   r2   r   r/   r-   DummyEstimatorr     s    		(r/   r   r!   r`   N)r   r	   r)   r(   r>   r'   estimator_weights_estimator_errors_)r   rz   s     r-   %test_sample_weight_adaboost_regressorr     sQ    ( ( n.Q?E	IIau''(C0G0G,HHHHr/   c                     t         j                  j                  d      } | j                  ddd      }| j	                  ddgd      }| j                  d      }t        t        d            }|j                  ||       |j                  |       |j                  |       t        t                     }|j                  ||       |j                  |       y)zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   3   r!   r   most_frequent)strategyN)r%   re   rf   randnchoicer   r   r)   r2   r*   r	   r   )rk   r(   ycyrrz   s        r-   test_multidimensional_Xr    s    
 ))


"C		"aA	QFB	B	2BHIE	IIa	MM!	n./E	IIa	MM!r/   c                  P   t         j                  t         j                  }} t        t	                     }t        |      }dj                  |j                  j                        }t        j                  t        |      5  |j                  | |       d d d        y # 1 sw Y   y xY w)Nrv   z {} doesn't support sample_weightr   )rJ   rL   rK   r   r   r   formatr   r   r   r   r   r)   )r(   r   rw   r,   err_msgs        r-   -test_adaboostclassifier_without_sample_weightr    sm    99dkkqA%o&78I
y
1C077	8K8K8T8TUG	z	11 
2	1	1s    BB%c                     t         j                  j                  d      } t        j                  ddd      }d|z  dz   | j	                  |j
                  d         dz  z   }|j                  d	d
      }|d	xx   dz  cc<   d|d	<   t        t               d
d      }t        |      }t        |      }|j                  ||       |j                  |d d	 |d d	        t        j                  |      }d|d	<   |j                  |||       |j                  |d d	 |d d	       }|j                  |d d	 |d d	       }|j                  |d d	 |d d	       }	||k  sJ ||	k  sJ |t        j                  |	      k(  sJ y )Nr   r   d     )numg?g?g-C6?r   r   r]   i'  rw   ra   r#   rb   )r%   re   rf   linspacerandr8   reshaper	   r
   r   r)   	ones_likerM   r   approx)
rk   r(   r   regr_no_outlierregr_with_weightregr_with_outlierrc   score_with_outlierscore_no_outlierscore_with_weights
             r-   $test_adaboostregressor_sample_weightr    s    ))


#C
As%A	q3388AGGAJ/&89A			"aA bERKEAbE ("$11O _-o. !Q#2#2'LLOMM"A];*003B3B@&,,QsVQsV<(..q"vq"v> 0000 1111v}}->????r/   c                     t        t        j                  d      ddi\  } }}}t        d      }|j	                  | |       t        t        j                  |j                  |      d      |j                  |             y )NT)
return_X_yr#   r   r"   r   axis)
r   r   load_digitsr   r)   r   r%   argmaxr*   r2   )r   r   r   r   models        r-    test_adaboost_consistent_predictr!    ss     (8				.(=?($GVWf B/E	IIgw
		%%%f-A6f8Mr/   zmodel, X, yc                     t        j                  |      }d|d<   d}t        j                  t        |      5  | j                  |||       d d d        y # 1 sw Y   y xY w)Nir   z1Negative values in data passed to `sample_weight`r   rb   )r%   r  r   r   r   r)   )r   r(   r   rc   r  s        r-   #test_adaboost_negative_weight_errorr#    sL     LLOMM"AG	z	1		!Qm	4 
2	1	1s   AAc                     t         j                  j                  d      } | j                  d      }| j	                  ddgd      }t        j
                  |      dz  }t        dd	
      }t        |dd	      }|j                  |||       t        j                  |j                        j                         dk(  sJ y)zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r  r]   r^   r   r   r  gtDS 'T	r]      )	max_depthr#      r  rb   N)r%   re   rf   normalr  r  r   r   r)   isnanr   sum)rk   r(   r   rc   tree	ada_models         r-   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsr-  '  s     ))


#C



#A

Aq6
%ALLOf,M!BR@D"TQSTIMM!QmM488I223779Q>>>r/   c                    d}t        j                  |d|       \  }}t        d|       j                  ||      }|j	                  |      }t        |j                  d      dd       t        t        j                  |            dd	|dz
  z  hk(  sJ |j                  |      D ]K  }t        |j                  d      dd       t        t        j                  |            dd	|dz
  z  hk(  rKJ  |j                  d
      j                  ||       |j	                  |      }t        |j                  d      dd       |j                  |      D ]   }t        |j                  d      dd       " y)zCheck that the decision function respects the symmetric constraint for weak
    learners.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/26520
    r!   r   )r   n_clusters_per_classr#   rd   r  r   g:0yE>)atolr   r   r`   N)r   r   r   r)   r9   r   r*  rO   r%   r5   r   
set_params)global_random_seedr   r(   r   r,   y_scores         r-   test_adaboost_decision_functionr4  8  s`    I''!BTDAq !:L
M
Q
QRSUV
WC##A&GGKKQK'6 ryy!"q"	A*>&???? //2+QT: 299W%&1bIM.B*CCCC 3 NNN"&&q!,##A&GGKKQK'6//2+QT: 3r/   )Ur   r   numpyr%   r   sklearnr   sklearn.baser   r   sklearn.dummyr   r   r   r   r	   sklearn.linear_modelr
   r   sklearn.model_selectionr   r   sklearn.svmr   r   sklearn.treer   r   sklearn.utilsr   sklearn.utils._mockingr   sklearn.utils._testingr   r   r   sklearn.utils.fixesr   r   r   r   r   re   rf   rk   r(   r1   r>   r3   r4   r?   	load_irisrJ   permutationrK   r_   permrL   load_diabetesrY   r.   r;   r@   rR   markparametrizer[   rt   r|   r   r   r   r   r   r   r   r   r   r  r  r  r!  r#  r-  r4  r   r/   r-   <module>rG     s   < 	    - 9 B E B   F ! 8 
  	iiA 	"XBx"bAq6Aq6Aq6:
(	"X1v1v	 x
t{{''( DKKcJ 	4; "8!!#!(MM8??" x
J71Y, !DE
Y F
Y*8Z,$6B*< 8( .	
	
	
 	
 		

 	
 	^++	WEWEt .	
	
	
 	
 		

 	
 	^++	,E,E^I&*#@L 		tyy$++6		hmmX__=55?"";r/   