
    Q3jH                     
   d dl Zd dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z d dl	mZmZmZ d dlmZ d dlmZmZ d d	lmZ ej,                  j/                  d
ddg      ej,                  j/                  dddg      d               Zej,                  j/                  d
ddg      ej,                  j/                  ddddddgg      ej,                  j/                  dddg      d                      Zej,                  j/                  dddg      d        Zd Zd Zej,                  j/                  d
ddg      ej,                  j/                  dg d      d               Zd  Zej,                  j/                  d
ddg      ej,                  j/                  dg d!      d"               Zej,                  j/                  d#dd$g      ej,                  j/                  d
ddg      ej,                  j/                  dddddg d!g      d%                      Z ej,                  j/                  ddg d&g      ej,                  j/                  d
ddg      d'               Z!ej,                  j/                  d( e             ej,                  j/                  d) ejD                  d*       ejF                  d+      dfd,  ejH                  d      jK                  ejF                        dfd- d. d/fd0 d1 d2d/gf ejL                  g d3       ejL                  g d4      d f ejL                  ejN                  ejN                  d d5d6dg       ejL                  g d7      d f ejL                  g d3       ejL                  g d7ejF                  8      d2d/gfg      d9               Z(ej,                  j/                  d
ddg      ej,                  j/                  d:d+d;g      d<               Z)ej,                  j/                  d=d>ejN                  dgfdd>gejN                  ejN                  gd?d@ggfg      dA        Z*ej,                  jW                  e edB      k  dCD      ej,                  j/                  dEg dF      ej,                  j/                  d
ddg      ej,                  j/                  dGddg      dH                             Z,ej,                  jW                  e edB      k  dID      ej,                  j/                  dEg dF      ej,                  j/                  d
ddg      ej,                  j/                  dGddg      dJ                             Z-y)K    N)assert_allcloseassert_array_equal)approx)config_contextdevice)get_namespacemove_to)yield_namespace_device_dtype_combinations)_array_api_for_tests)
np_versionparse_version)_weighted_percentileaverageTFsize
      c                 
   t        j                  |       }t        j                  |      }t        ||d|      }| dz  dk(  r|du r|t        j                  |      k7  sJ yt        |      t        j                  |      k(  sJ y)au  Ensure `_weighted_percentile` matches `median` when expected.

    With unit `sample_weight`, `_weighted_percentile` should match the median except
    when `average=False` and the number of samples is even.
    For an even array and `average=False`, `percentile_rank=50` gives the lower
    of the two 'middle' values, that are averaged when calculating the `median`.
    2   r      r   FN)nparange	ones_liker   medianr   )r   r   ysample_weightscores        K/DATA/.local/lib/python3.12/site-packages/sklearn/utils/tests/test_stats.py'test_weighted_percentile_matches_medianr       sr     			$ALLOM M2wGE ax1}E)		!$$$e}		!,,,    percentile_rank   #   =      /   c                 
   t         j                  j                  |       }|j                  d|      }t        j                  |      }t        ||||      }|rd}nd}t        |      t        j                  |||      k(  sJ y)a  Check `_weighted_percentile` with unit weights is correct.

    `average=True` results should be the same as `np.percentile`'s
    'averaged_inverted_cdf'.
    `average=False` results should be the same as `np.percentile`'s
    'inverted_cdf'.
    Note `np.percentile` is the same as `np.quantile` except `q` is in range [0, 100].

    We parametrize through different `percentile_rank` and `size` to
    ensure we get cases where `g=0` and `g>0` (see Hyndman and Fan 1996 for details).
    r#   r   r   averaged_inverted_cdfinverted_cdf)methodN)r   randomRandomStaterandintr   r   r   
percentile)	global_random_seedr   r"   r   rngr   swr   r,   s	            r   &test_weighted_percentile_matches_numpyr4   (   st    " ))

 2
3CBT"A	aB BIE(%=BMM!_VLLLLr!   r   d   c                     t        j                  ddgddgg      }t        j                  ddgddgg      }t        ||| d      }t        d      D ]  }||   t	        d	      k(  rJ  y
)a  Check `j+1` index is clipped to max, when `average=True`.

    `percentile_plus_one_indices` can exceed max index when `percentile_indices`
    is already at max index.
    Note that when `g` (Hyndman and Fan) / `fraction_above` is greater than 0,
    `j+1` (Hyndman and Fan) / `percentile_plus_one_indices` is calculated but
    never used, so it does not matter what this value is.
    When percentile of percentile rank 100 falls exactly on the last value in the
    `weighted_cdf`, `g=0` and `percentile_indices` is at max index. In this case
    we set `percentile_plus_one_indices` to be max index as well, so the result is
    the average of 2x the max index (i.e. last value of `weighted_cdf`).
    r      g?g?r      Tr   g      ?N)r   arrayr   ranger   )r"   r   r3   r   idxs        r   *test_weighted_percentile_plus_one_clip_maxr<   G   sn      	1a&1a&!"A	C:1v&	'B BFEQxSzVC[((( r!   c                      t        j                  dt         j                        } t        j                  dt         j                        }t	        | |d      }t        |      dk(  sJ y)zJCheck `weighted_percentile` with unit weights and all 0 values in `array`.f   dtyper   r   N)r   zerosfloat64onesr   r   )r   r3   r   s      r   test_weighted_percentile_equalrD   ^   sJ    
BJJ'A	BJJ	'B B+E%=Ar!   c                      t        j                  d      } t        j                  d      }t        | |d      }t        j                  |      sJ y)zICheck `weighted_percentile` with all weights equal to 0 returns `np.nan`.r   r   N)r   r   rA   r   isnan)r   r3   values      r   )test_weighted_percentile_all_zero_weightsrH   f   s:    
		"A	"B B+E88E??r!   zpercentile_rank, expected_value))r   r   )r   r8   )r5   r&   c                 @   t        j                  g d      }t        j                  g d      }t        t        j                  ||f      j                  t        j                  ||f      j                  ||       }t        d      D ]  }t        ||         |k(  rJ  y)a  Check leading, trailing and middle 0 weights behave correctly.

    Check that leading zero-weight observations are ignored when `percentile_rank=0`.
    See #20528 for details.
    Check that when `average=True` and the `j+1` ('plus one') index has sample weight
    of 0, it is ignored. Also check that trailing zero weight observations are ignored
    (e.g., when `percentile_rank=100`).
    )r   r7   r   r8      r&      )r   r   r7   r7   r   r7   r   r   r   N)r   r9   r   vstackTr:   r   )r   r"   expected_valuer   r3   rG   r;   s          r   ,test_weighted_percentile_ignores_zero_weightrO   n   s     	&'A	'	(B 
		1a&RYYBx022OWE QxeCj!^333 r!   c                  l    t        ddgddgd      } t        g dg dd      }t        |       |k(  sJ y)z=Check zero weights just before `max_index` handled correctly.r7   r8   Tr   )r7   r   r8   )r8   r   r8   N)r   r   )score_without_zerosscore_with_zeross     r   4test_weighted_percentile_average_zero_weight_plateaurS      s=    .1v1vtL+Iy$O%&*::::r!   )r#   r$   r   r%   c                    t         j                  j                  |       }|j                  dd      }|j	                  dd      }t        j
                  ||      }t        ||||      }t        |t        j                  |      ||      }|t        |      k(  sJ |dk(  r'|r$|t        t        j                  |            k(  sJ yyy)z?Check integer weights give the same result as repeating values.r#   r   r)   r&   r   r   N)
r   r-   r.   r/   choicerepeatr   r   r   r   )	r1   r"   r   r2   xweights
x_repeatedpercentile_weightspercentile_repeateds	            r   3test_weighted_percentile_frequency_weight_semanticsr\      s     ))

 2
3CBR Ajjj$G1g&J-	7OW /BLL,ow (;!<<<<"!VBIIj,A%BBBB ")r!   constant   c                     t         j                  j                  |       }|j                  dd      }|j	                  dd      }||z  }t        ||||      }t        ||||      }	|t        |	      k(  sJ y)zCheck multiplying weights by a constant does not change the result.

    Note scale invariance does not always hold when multiplying by a
    float due to cumulative sum numerical error (which grows proportional to n).
    r#   r)   r&   r   N)r   r-   r.   r/   rU   r   r   )
r1   r"   r   r]   r2   rW   rX   weights_multipliedr0   percentile_multipliers
             r   ,test_weighted_percentile_constant_multiplierrb      s     ))

 2
3CBR Ajjj$G 8+%a/7SJ0	  56666r!   )r#   r$   r   c                    t         j                  j                  |       }|j                  dd      }|j	                  dd      }|j                  dd      }t        j
                  ||f      j                  }t        ||||      }t        |t              rg }	|D ]K  }
|	j                  t        |j                  d         D cg c]  }t        |dd|f   ||
|       c}       M t        j                  |	d	      }|j                  |j                  d   t        |      fk(  sZJ t        |j                  d         D cg c]  }t        |dd|f   |||       }}|j                  |j                  d   fk(  sJ t        ||       |j	                  dd      }t        j
                  ||f      j                  }t        ||||      }t        |t              rg }	|D ]R  }
|	j                  t        |j                  d         D cg c]  }t        |dd|f   |dd|f   |
|      ! c}       T t        j                  |	d	      }|j                  |j                  d   t        |      fk(  saJ t        |j                  d         D cg c]  }t        |dd|f   |dd|f   ||      ! }}|j                  |j                  d   fk(  sJ t        ||       yc c}w c c}w c c}w c c}w )
zECheck `_weighted_percentile` behaviour is correct when `array` is 2D.r   r)   r&   r#   )r"   r   r7   N)axis)r   r-   r.   r/   rU   rL   rM   r   
isinstancelistappendr:   shapestacklenr   )r1   r"   r   r2   x1w1x2x_2dwpp_listprip_axis_0w2w_2ds                  r   test_weighted_percentile_2drw      s   
 ))

 2
3C	Rb	!B	AB	B	Rb	!B99b"X  D	b/7
B /4(!BMM
 #4::a=1	 2 )QT
BG 2	 " 88F,xxDJJqM3+?@@@@ 4::a=)	
 * !QT
B *	 	 
 xxDJJqM++++B! 
AB	B99b"X  D	dOW
B /4(!BMM
 #4::a=1	 2 )QT
DAJG 2	 " 88F,xxDJJqM3+?@@@@ 4::a=)	
 * !QT
DAJQX *	 	 
 xxDJJqM++++B!i
,
s   K'
K,$K1
$K6z(array_namespace, device_name, dtype_namezdata, weights, percentile*   r7   c                 $    | j                  d      S Nr   randr2   s    r   <lambda>r~     s    SXXb\r!   c                 &    | j                  dd      S )Nr   r8   r{   r}   s    r   r~   r~     s    SXXb!_r!   c                 ^    | j                  d      j                  t        j                        S rz   r|   astyper   float32r}   s    r   r~   r~     s    #((2,2E2Ebjj2Qr!   K   c                 &    | j                  dd      S Nr#   r8   r{   r}   s    r   r~   r~     s    Qr!   c                 `    | j                  dd      j                  t        j                        S r   r   r}   s    r   r~   r~     s    Q..rzz:r!      )r   r7   r   r8   rJ   r&   )r   r   r7   r7   r7   r   r8   rJ   )r   r7   r7   r7   r7   r   r?   c                 .   t        |||      \  }}|j                  d|      }	|j                  d|      }
|dk(  r<|j                  |j	                  |	|
      |	k(        rt        j                  d|        t        j                  j                  |       }t        |      r ||      n|}t        |      r ||      n|}|j                  |      }t        |||      }|j                  ||      }|j                  ||      }t        d      5  t        |||      }t        |      t        |      k(  sJ t!        |      d   t!        |      d   k(  sJ t#        |t        d      }d	d	d	       j$                  |j$                  k(  sJ |j&                  |j&                  k(  sJ t)        ||       |d
k(  r3|j$                  |j$                  cxk(  rt        j*                  k(  sJ  J y	|j$                  t        j,                  k(  sJ y	# 1 sw Y   xY w)zECheck `_weighted_percentile` gives consistent results with array API.r7   r   r   zxp.nextafter is broken on T)array_api_dispatchcpu)xpr   Nr   )r   rA   rC   all	nextafterpytestxfailr   r-   r.   callabler   r   asarrayr   array_devicer	   r
   r@   ri   r   r   rB   )r1   array_namespacedevice_name
dtype_namedatarX   r0   r   r   zerooner2   X_np
weights_np	result_npX_xp
weights_xp	result_xpresult_xp_nps                      r   .test_weighted_percentile_array_api_consistencyr     s   R &o{JOJB
 88Af8%D
''!F'
#CQ266",,tS"9T"AB1&:;
))

 2
3C 49DD!)'!2J;;z"D$T:zBI::d6:*DJv6J	4	0(z:F	I&,t*<<<<Y'*mD.A!.DDDDyR>	 
1 000000I|, Y!!Y__B

BBBBB!!RZZ/// 
1	0s   AHHsample_weight_ndimr   c                    t         j                  j                  |       }|j                  dd      }t         j                  | |j                  |j
                   dk  <   t        j                  |      }|dk(  r|j                  ddd      }n|j                  ddd	      }t        ||d
|      }t        |j
                  d         D cg c]  }||dd|f    |f    }	}|j                  dk(  rMt        j                  ||j
                  d         j                  |j
                  d   |j
                  d         }t        |j
                  d         D cg c]  }||dd|f    |f    }
}t        j                  t        |j
                  d         D cg c]  }t        |	|   |
|   d
|       c}      }t        ||       yc c}w c c}w c c}w )a>  Test `_weighted_percentile` ignores NaNs.

    Calling `_weighted_percentile` on an array with nan values returns the same
    results as calling `_weighted_percentile` on a filtered version of the data.
    We test both with sample_weight of the same shape as the data and with
    one-dimensional sample_weight.
    r5   r         ?r   r7   rK   )r5   r   r)   )r5      r   Nr   )r   r-   r.   r|   nanri   rF   r/   r   r:   ndimrV   reshaper9   r   )r1   r   r   r2   array_with_nansnan_maskr   resultscolfiltered_arrayfiltered_weightsexpected_resultss               r   %test_weighted_percentile_nan_filteredr   T  s    ))

 2
3ChhsB'O>@ffOHCHHo334s:;xx(HQAqy9Aqv6 #?M2wWG
 ..q122C 	!S&))3./2   Q		-1F1Fq1IJRR!!!$o&;&;A&>
 :??T?TUV?W9X9X#x3'',-9X   xx
 _22156		
 7 !s#%5c%:B 7		
 '1+
	
s   G
GGzpercentile_rank, expectedZ   g       @g      @c           	         t        j                  t         j                  dgt         j                  dgt         j                  t         j                  gt         j                  t         j                  gt         j                  dgt         j                  t         j                  gg      }t        j                  |      }t	        |||       }t        j
                  ||d      sJ y)zCCheck that nans are ignored in general, except for all NaN columns.r&   r7   r   T)	equal_nanN)r   r9   r   r   r   array_equal)r"   expectedr9   rX   valuess        r   'test_weighted_percentile_all_nan_columnr     s     HHVVQKVVQKVVRVVVVRVVVVQKVVRVV	
	E ll5!G!%/BF
 >>&(d;;;r!   z2.0z2np.quantile only accepts weights since version 2.0)reasonr0   )B   r   r   uniform_weightc                    |r|st        j                  d       t        j                  j	                  |      }|j                  dd      }|r+t        j                  |      |j                  ddd      z  }n|j                  ddd      }t        ||| |      }t        j                  || dz  |s|nd	|rd
ndd      }t        ||       y	)zICheck `_weighted_percentile` is equivalent to `np.quantile` with weights.zHnp.quantile does not support weights with method='averaged_inverted_cdf'r   r5   r7   rK   r)   r   r5   r   Nr*   r+   r   rX   r,   re   )r   skipr   r-   r.   r|   r   r/   r   quantiler   )	r0   r   r   r1   r2   r9   r   percentile_weighted_percentilepercentile_numpy_quantiles	            r   ,test_weighted_percentile_like_numpy_quantiler     s     ~V	
 ))

 2
3CHHREU+ckk!QQk.GGAqy9%9}j'&" !#S%3*1&~! 57PQr!   z5np.nanquantile only accepts weights since version 2.0c                    |r|st        j                  d       t        j                  j	                  |      }|j                  dd      }t        j                  | |j
                  |j                   dk  <   |r+t        j                  |      |j                  ddd      z  }n|j                  ddd      }t        ||| |	      }t        j                  || dz  |s|nd
|rdndd      }t        ||       y
)zICheck `_weighted_percentile` equivalent to `np.nanquantile` with weights.zKnp.nanquantile does not support weights with method='averaged_inverted_cdf'r   r5   r   r7   rK   r)   r   r   Nr*   r+   r   r   )r   r   r   r-   r.   r|   r   ri   r   r/   r   nanquantiler   )	r0   r   r   r1   r2   r   r   r   percentile_numpy_nanquantiles	            r   /test_weighted_percentile_like_numpy_nanquantiler     s     ~-	

 ))

 2
3Chhr3'O>@ffOHCHHo334s:;_5 9D 9
 
 Aqy9%9
G&" $&>>S%3*1&~$  57STr!   ).numpyr   r   numpy.testingr   r   r   sklearn._configr   sklearn.utils._array_apir   r   r	   r
   r   sklearn.utils.estimator_checksr   sklearn.utils.fixesr   r   sklearn.utils.statsr   markparametrizer    r4   r<   rD   rH   rO   rS   r\   rb   rw   r   int32rC   r   r9   r   r   r   r   skipifr   r    r!   r   <module>r      sb     =  * ; 
 @ 9 4 T5M2"b*- + 3-( T5M2*RR!R,AB"b*M + C 3M8 *RI6) 7), T5M2:<WX4 Y 34*; T5M2*,<=C > 3C* aV,T5M2*RR=M,NO7 P 3 -7( *R,>?T5M2F" 3 @F"R .-/  
B!b)	!72772;#5#5bhh#?D	$&QSUV (:H	
 
$	%xrxx0B'CQG	2662661aA.	/:L1MqQ BHH'(BHH'rxx8H	
%6-07	>-0` T5M2-1v6.2 7 3.2b 	bffa[
bRVVRVV$sCj12<<, u%%?   |4UDM2)E4=9R : 3 5	R> u%%B   |4UDM2)E4=9"U : 3 5	"Ur!   