
    O3jR?                       U d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e
r?d dlmZmZmZmZmZ d dl	mZ d dlZ d dl!m"Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, dZ-de.d<   dZ/de.d<   ed   Z0de.d<   eddddddd d!d"d#d$d%d&d'd(d)d*d+d,e0f   Z1de.d-<   	 d.Z2de.d/<   	 eZ3de.d0<   	 d d1d d2Z4d3e.d4<    ed56      d@d7       Z5 G d8 d9      Z6 G d: d;ed<d=e1f         Z7dAd>Z8dBd?Z9y)C    )annotationsN)	lru_cache)chain)methodcaller)TYPE_CHECKINGAnyClassVarLiteral)EagerGroupBy)issue_warning)!evaluate_output_names_and_aliases)make_group_by_kwargs)is_pandas_like_dataframe)CallableIterableIteratorMappingSequence)	TypeAlias)DataFrameGroupBy)Unpack)NarwhalsAggregationScalarKwargs)PandasLikeDataFrame)PandasLikeExprz._NativeGroupBy[tuple[str, ...], Literal[True]]r   NativeGroupByz(Callable[[pd.DataFrame], pd.Series[Any]]NativeApply)covskewInefficientNativeAggregationanyallcountidxmaxidxminmaxmeanmedianminmodenthnuniqueprodquantilesemsizestdsumvarNativeAggregationz.Callable[[Any], pd.DataFrame | pd.Series[Any]]
_NativeAggNonStrHashable)firstlast	any_valuez,Mapping[NarwhalsAggregation, Literal[0, -1]]_REMAP_ORDERED_INDEX    )maxsizec                   | dk(  rt        | d      S | dk(  r d|v sJ d|v sJ t        | |d   |d         S |r|j                  d      dk(  rt        |       S t        | fi |S )	Nr,   F)dropnar.   interpolation)qr@   ddof   )r   get)namekwdss     K/DATA/.local/lib/python3.12/site-packages/narwhals/_pandas_like/group_by.py_native_aggrH   E   s    yD//zT!!!$&&&DD$4DDYZZ488F#q(D!!%%%    c                      e Zd ZU dZded<   ded<   ded<   ddZddZdd	Zdd
ZddZ	ddZ
ddZddZddZedd       ZddZy)AggExpraM  Wrapper storing the intermediate state per-`PandasLikeExpr`.

    There's a lot of edge cases to handle, so aim to evaluate as little
    as possible - and store anything that's needed twice.

    Warning:
        While a `PandasLikeExpr` can be reused - this wrapper is valid **only**
        in a single `.agg(...)` operation.
    r   exprzSequence[str]output_namesaliasesc                <    || _         d| _        d| _        d| _        y )N  )rL   rM   rN   
_leaf_name)selfrL   s     rG   __init__zAggExpr.__init__a   s    	57rI   c               ~    |j                   }|j                  }t        | j                  ||      \  | _        | _        | S )zd**Mutating operation**.

        Stores the results of `evaluate_output_names_and_aliases`.
        )	compliantexcluder   rL   rM   rN   )rS   group_bydfrW   s       rG   with_expand_nameszAggExpr.with_expand_namesg   s@    
 ""*KIIr7+
'4< rI   c                   |j                   }| j                  }| j                         r"| j                         r|j	                         }n| j                         rs|j	                         }|j
                  j                         }|j                  |D cg c],  }|j                  |      j                  |      j                  . c}      }nB| j                         r.|j
                  }|j                  | j                        }	|	j                  d      x}
dk7  rd|
 d|j                   d}t!        |      t#        |      }|j                  }|j$                  |j&                  }}|j                         }|j                  |D cg c]w  }  |j(                  g ||fi |j	                         j+                  d      j-                  |      j(                  |fi ||   j/                  d      j1                         y c}      }n| j3                         s | j5                         s| j7                         r | j9                         |g |j$                  |         }|j
                  j                  }|j;                         }|j=                         r#|d	k  r|j?                  |j$                  d
       nS|j?                  |j$                        }n7tA        |      dk(  r|d   n
t#        |      } | j9                         ||         }tC        |      rt#        | jD                        |_#        |S | jD                  d   |_$        |S c c}w c c}w )z8Evaluate the wrapped expression as a group_by operation.keepr!   z`Expr.mode(keep='z7')` is not implemented in group by context for backend z3

Hint: Use `nw.col(...).mode(keep='any')` instead.F)	ascendingrC      r   Tinplacer   )%_groupedrM   is_lenis_top_level_functionr0   rV   __narwhals_namespace___concat_horizontalfrom_nativealiasnativeis_mode_kwargsrL   rD   _implementationNotImplementedErrorlist_keys_group_by_kwargsgroupbysort_valuesreset_indexhead
sort_indexis_lastis_firstis_any_value
native_agg_backend_version	is_pandas	set_indexlenr   rN   columnsrE   )rS   rX   groupednamesresultresult_singlensrE   rV   node_kwargsr\   msgcolsri   keyskwargscolimplbackend_versionselects                       rG   _getitem_aggszAggExpr._getitem_aggss   s   ##!!;;=T779\\^F[[]#LLNM##::<B**NSTed.44T:AAeTF \\^ **I"**4995K#//E9'v .(889 :HH 
 *#..;D%%F#>>8+D+D&D 113B**  $	  $NFNN<T<3<:6:TV [5[1 [%WT	- &,	- .1	2
 T!WZ\"  $	F \\^t}}$2C2C2E&T__&w/H/H%/H'IJF%%55D"335O~~Of$<   >))(..9!$UqU1Xd5kF&T__&wv7F#F+!$,,/FN  ,,q/FKa U*	s   1MA<Mc                     | j                   dk(  S )Nr}   	leaf_namerS   s    rG   rc   zAggExpr.is_len   s    ~~&&rI   c                     | j                   dk(  S )Nr9   r   r   s    rG   rv   zAggExpr.is_last       ~~''rI   c                     | j                   dk(  S )Nr8   r   r   s    rG   rw   zAggExpr.is_first   s    ~~((rI   c                     | j                   dk(  S )Nr*   r   r   s    rG   rj   zAggExpr.is_mode   r   rI   c                     | j                   dk(  S )Nr:   r   r   s    rG   rx   zAggExpr.is_any_value   s    ~~,,rI   c                t    t        t        | j                  j                  j	                                     dk(  S )NrC   )r}   rn   rL   	_metadataop_nodes_reversedr   s    rG   rd   zAggExpr.is_top_level_function   s*    4		++==?@AQFFrI   c                    | j                   x}r|S t        j                  | j                        | _         | j                   S N)rR   PandasLikeGroupByrL   )rS   rE   s     rG   r   zAggExpr.leaf_name   s6    ??"4"K+66tyyArI   c                v   t         j                  | j                        }t        | j                  j
                  j                               }| j                  t        v rF|j                  j                  d      rd}t        |      t        dt        | j                           S t        |fi |j                  S )z@Return a partial `DataFrameGroupBy` method, missing only `self`.ignore_nullszd`Expr.any_value(ignore_nulls=True)` is not supported in a `group_by` context for pandas-like backendr+   )n)r   _remap_expr_namer   nextrL   r   r   r;   r   rD   rm   r   rH   )rS   native_name	last_noder   s       rG   ry   zAggExpr.native_agg   s    '88H,,>>@A	>>11##N36  *#..)=dnn)MNN;;)*:*:;;rI   N)rL   r   returnNone)rX   r   r   rK   )rX   r   r   zpd.DataFrame | pd.Series[Any])r   bool)r   zNarwhalsAggregation | Any)r   r5   )__name__
__module____qualname____doc____annotations__rT   rZ   r   rc   rv   rw   rj   rx   rd   propertyr   ry   rP   rI   rG   rK   rK   R   sc     8
;z'()(-G  <rI   rK   c                  ,   e Zd ZU i ddddddddddddddddd	d
ddddddddddddddddZded<   ded<   	 ded<   	 ded<   	 ded<   	 ed'd       Z	 	 	 	 	 	 	 	 d(dZd)d Z	 	 	 	 	 	 d*d!Z		 	 	 	 d+d"Z
	 	 	 	 	 	 d,d#Zd-d$Zd.d%Zy&)/r   r2   r'   r(   r&   r)   r*   r1   r3   r}   r0   n_uniquer,   r#   r.   r"   r!   r8   r+   r9   r:   z9ClassVar[Mapping[NarwhalsAggregation, NativeAggregation]]_REMAP_AGGStuple[str, ...]_original_columnsz	list[str]ro   _output_key_nameszMapping[str, bool]rp   c                    | j                   S )z>Group keys to ignore when expanding multi-output aggregations.)_excluder   s    rG   rW   zPandasLikeGroupBy.exclude   s     }}rI   c                 t        |j                        | _        || _        | j	                  ||      \  | _        | _        | _        g | j                  | j                  | _        t        |      | _
        | j                  j                  | _        t        | j                  j                  j                         j#                  | j                  j                        r"| j                  j%                  d      | _        y y )N)drop_null_keysT)drop)tupler~   r   _drop_null_keys_parse_keys_compliant_framero   r   r   r   rp   rV   ri   _nativesetindexr   intersectionrs   )rS   rY   r   r   s       rG   rT   zPandasLikeGroupBy.__init__   s     "'rzz!2-DHDTDTE
Atz4+A *P4::)O8N8N)O 4N S ~~,,t||!!''(55dnn6L6LM<<333>DL NrI   c                   d}g }d}|D ]  }|j                  t        |      j                  |              | j                  |      sd}t	        |j
                  j                               }|j                  j                  dd      x}s|r||k7  rd| d| d}t        |      |} |rZ | j                  j                  t        |      d	      j                  | j                  j                         fi | j                   }	n? | j                  j                  | j                  j                         fi | j                   }	|	| _        |r|r;| j$                  j'                         }
|
j)                  | j+                  |            }n| j$                  j-                         j/                  t        |	j0                        | j                  
      }n<| j$                  j2                  j4                  r
t7               | j9                  |	|      }| j$                  j:                  }|j=                         }|j?                         r|dk  r|jA                  d       n|jA                         }| jC                  ||      S )NTrP   Forder_byz?Only one `order_by` can be specified in `group_by`. Found both z and .r8   )na_position)r~   r^   r`   )"appendrK   rZ   
_is_simpler   r   r   r   rD   rm   r   rr   rn   rq   ro   copyrp   rb   rV   re   rf   r   __native_namespace__	DataFramegroupsri   emptyempty_results_error_apply_aggsrl   rz   r{   rs   _select_results)rS   exprsall_aggs_are_simple	agg_exprsr   rL   md_current_order_byr   r   r   r   r   r   s                 rG   aggzPandasLikeGroupBy.agg  s1   "#%	DWT]<<TBC??4(&+#dnn6689B$&IIMM*b$AA A 1X =[\d[eejk|j}}~C-c22,  &T\\%=%=XG &> &gdjjoo'&B+/+@+@&BG +dll**4::??+<V@U@UVG^^::<..t/A/A)/LM<<>HH($** I  ^^""((%''%%gu5F~~--//1>>& 8t,'')F##FI66rI   c          	        t        j                  d |D              } | j                  j                  |d      j                  g | j
                  | j                  t        t        | j
                  | j                  d                  S )zgResponsible for remapping temp column names back to original.

        See `ParseKeysGroupBy`.
        c              3  4   K   | ]  }|j                     y wr   )rN   ).0es     rG   	<genexpr>z4PandasLikeGroupBy._select_results.<locals>.<genexpr>L  s     'E9a		9s   F)validate_column_names)strict)
r   from_iterablerV   _with_nativesimple_selectro   renamedictzipr   )rS   rY   r   	new_namess       rG   r   z!PandasLikeGroupBy._select_resultsE  sz     '''E9'EE	DNN''%'H]4 JJ4)24VDTZZ)?)?NOP	
rI   c               J    |D cg c]  }|j                  |        c}S c c}w r   )r   )rS   r   r   s      rG   r   zPandasLikeGroupBy._getitem_aggsS  s%     055u!%u555s    c                    t                | j                  j                  }| j                  |      }|j                  }|j                         r|j                         dk\  r
 ||d      S  ||      S )a"  Stub issue for `include_groups` [pandas-dev/pandas-stubs#1270].

        - [User guide] mentions `include_groups` 4 times without deprecation.
        - [`DataFrameGroupBy.apply`] doc says the default value of `True` is deprecated since `2.2.0`.
        - `False` is explicitly the only *non-deprecated* option, but entirely omitted since [pandas-dev/pandas-stubs#1268].

        [pandas-dev/pandas-stubs#1270]: https://github.com/pandas-dev/pandas-stubs/issues/1270
        [User guide]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html
        [`DataFrameGroupBy.apply`]: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.groupby.DataFrameGroupBy.apply.html
        [pandas-dev/pandas-stubs#1268]: https://github.com/pandas-dev/pandas-stubs/pull/1268
        )   r   F)include_groups)warn_complex_group_byrV   rl   _apply_exprs_functionapplyr{   rz   )rS   r   r   r   funcr   s         rG   r   zPandasLikeGroupBy._apply_aggsX  sc     	~~--))%0>> 5 5 76 Ae44T{rI   c                      j                   j                         j                  j                  d fd}|S )Nc                    j                   j                  |       fdD        }|rt        |dding g f\  }} ||      j                  S )Nc              3     K   | ]6  } |      D ])  }|j                   j                  d    |j                  f + 8 yw)r   N)ri   ilocrE   )r   rL   r   rV   s      rG   r   zFPandasLikeGroupBy._apply_exprs_function.<locals>.fn.<locals>.<genexpr>t  sB      !D OD !!!$dii0+ 1!s   <?r   T)r   context)rV   r   r   ri   )	rY   results	out_group	out_namesrV   r   into_seriesr   rS   s	       @rG   fnz3PandasLikeGroupBy._apply_exprs_function.<locals>.fnr  s\    33B7I!G
 BI3#=#=rSUh Iyy	2FMMMrI   )rY   pd.DataFramer   zpd.Series[Any])rV   re   _seriesr   )rS   r   r   r   r   s   `` @@rG   r   z'PandasLikeGroupBy._apply_exprs_functionn  s7    ^^224jj..	N 	N 	rI   c              #    K    | j                   j                  | j                  j                         fi | j                  }t        j                         5  t        j                  ddt               | j                  j                  }|D ](  \  }}|  ||      j                  | j                   f * 	 d d d        y # 1 sw Y   y xY ww)Nignorez#.*a length 1 tuple will be returned)messagecategory)r   rq   ro   r   rp   warningscatch_warningsfilterwarningsFutureWarningrV   r   r   r   )rS   r   with_nativekeygroups        rG   __iter__zPandasLikeGroupBy.__iter__~  s     &$,,&&tzz'8RD<Q<QR$$&##=&
 ..55K%
U<K.<<d>T>TUVV & '&&s   ACA C 7	C C	CN)r   r   )rY   r   r   z(Sequence[PandasLikeExpr] | Sequence[str]r   r   r   r   )r   r   r   r   )r   zSequence[AggExpr]rY   r   r   r   )r   zIterable[AggExpr]r   z#list[pd.DataFrame | pd.Series[Any]])r   r   r   Iterable[PandasLikeExpr]r   r   )r   r  r   r   )r   z)Iterator[tuple[Any, PandasLikeDataFrame]])r   r   r   r   r   r   rW   rT   r   r   r   r   r   r  rP   rI   rG   r   r      s   NuNN 	(N 	u	N
 	uN 	N 	uN 	uN 	vN 	IN 	N 	JN 	uN 	uN 	N  	!N" 	U#NKJ & '&EO  8((K ?? 7? ? 
?,-7^
.?

	
6&6	,6
$-E	, 
WrI   r   r   r   c                     d} t        |       S )zJDon't even attempt this, it's way too inconsistent across pandas versions.au  No results for group-by aggregation.

Hint: you were probably trying to apply a non-elementary aggregation with a pandas-like API.
Please rewrite your query such that group-by aggregations are elementary. For example, instead of:

    df.group_by('a').agg(nw.col('b').round(2).mean())

use:

    df.with_columns(nw.col('b').round(2)).group_by('a').agg(nw.col('b').mean())

)
ValueError)r   s    rG   r   r     s    	^  c?rI   c                 $    t        dt               y )Na)  Found complex group-by expression, which can't be expressed efficiently with the pandas API. If you can, please rewrite your query such that group-by aggregations are simple (e.g. mean, std, min, max, ...). 

Please see: https://narwhals-dev.github.io/narwhals/concepts/improve_group_by_operation/)r   UserWarningrP   rI   rG   r   r     s    	W
 	rI   )rE   r4   rF   zUnpack[ScalarKwargs]r   r5   )r   r  )r   r   ):
__future__r   r   	functoolsr   	itertoolsr   operatorr   typingr   r   r	   r
   narwhals._compliantr   narwhals._exceptionsr   narwhals._expression_parsingr   narwhals._pandas_like.utilsr   narwhals.dependenciesr   collections.abcr   r   r   r   r   r   pandaspdpandas.api.typingr   _NativeGroupBytyping_extensionsr   narwhals._compliant.typingr   r   narwhals._pandas_like.dataframer   narwhals._pandas_like.exprr   r   r   r   r    r4   r5   r6   r;   rH   rK   r   r   r   rP   rI   rG   <module>r     sM   "    ! 8 8 , . J < :OO D(LC9OM9OCY C*1-*@ i @&			
	
	
	
			 '"  9 , hH
I H @  	  6 F B  2	& 	&D< D<NoW&(8:KKLoWdrI   