
    O3jM                       U d dl mZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlmZ d d	lmZ erwd d
lmZmZm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dlm(Z( d dl)mc m*Z+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dlm9Z9m:Z: d dl;m<Z< d dl=m>Z> d dlm?Z? d dlm@Z@mAZAmBZB dZCdeDd<   	  G d ded   e      ZEy)     )annotations)TYPE_CHECKINGAnyN)add_row_indexevaluate_exprs)native_to_narwhals_dtypeselect_columns_by_name)assert_never)ImplementationValidateBackendVersion_remap_full_join_keyscheck_column_names_are_uniquecheck_columns_existgenerate_temporary_column_namenot_implementedparse_columns_to_drop)MultiOutputExpressionError)CompliantLazyFrame)IterableIteratorMappingSequence)BytesIO)Path)
ModuleType)	TypeAlias)SelfTypeIs)CompliantDataFrameAny)DaskExprDaskLazyGroupByDaskNamespace)_EagerAllowedImpl)Version_LimitedContext)	LazyFrame)DType)ColumnNotFoundError)AsofJoinStrategyJoinStrategyUniqueKeepStrategyr   r   
Incompletec                     e Zd Zej                  Zdd	 	 	 	 	 	 	 d/dZed0d       Ze	d1d       Z
d2dZd3dZd4dZd5d	Zd6d
Zd7dZd8dZd9dZd:dZd;dZ	 	 	 	 	 	 d<dZed=d       Zd>dZd?dZd;dZd;dZd@dZedAd       ZdAdZdBdZdCdZ dDdZ!dEdZ"	 	 	 	 	 	 	 	 dFdZ#dGdZ$dHdZ%	 	 	 	 	 	 	 	 	 	 dId Z&	 	 	 	 	 	 	 	 	 	 dId!Z'	 	 	 	 	 	 	 	 	 	 dId"Z(dJd#Z)	 	 	 	 	 	 	 	 dKd$Z*	 	 	 	 	 	 	 	 dKd%Z+	 	 	 	 	 	 	 	 dLd&Z,	 	 	 	 	 	 	 	 	 	 	 	 dMd'Z-	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dNd(Z.	 	 	 	 	 	 dOd)Z/dEd*Z0dPd+Z1	 	 	 	 	 	 	 	 	 	 dQd,Z2dRd-Z3 e4       Z5y.)SDaskLazyFrameF)validate_backend_versionc               b    || _         || _        d | _        d | _        |r| j	                          y y N)_native_frame_version_cached_schema_cached_columns_validate_backend_version)selfnative_dataframeversionr1   s       E/DATA/.local/lib/python3.12/site-packages/narwhals/_dask/dataframe.py__init__zDaskLazyFrame.__init__:   s7     ,<7;15#**, $    c                6    t        | t        j                        S r3   )
isinstancedd	DataFrame)objs    r<   
_is_nativezDaskLazyFrame._is_nativeH   s    #r||,,r>   c              *     | ||j                         S Nr;   )r5   )clsdatacontexts      r<   from_nativezDaskLazyFrame.from_nativeL   s    4!1!122r>   c                <    | j                   j                  | d      S )Nlazy)level)r5   	lazyframer9   s    r<   to_narwhalszDaskLazyFrame.to_narwhalsP   s    }}&&t6&::r>   c                    | j                   t        j                  u r| j                   j                         S dt	        | j                          }t        |      )NzExpected dask, got: )_implementationr   DASKto_native_namespacetypeAssertionError)r9   msgs     r<   __native_namespace__z"DaskLazyFrame.__native_namespace__S   sN    >#6#66'';;==$T$*>*>%?$@AS!!r>   c                4    ddl m}  || j                        S )Nr   r#   rG   )narwhals._dask.namespacer$   r5   )r9   r$   s     r<   __narwhals_namespace__z$DaskLazyFrame.__narwhals_namespace__Z   s    :T]]33r>   c                    | S r3    rP   s    r<   __narwhals_lazyframe__z$DaskLazyFrame.__narwhals_lazyframe___   s    r>   c                <    | j                  | j                  |      S rF   )	__class__native)r9   r;   s     r<   _with_versionzDaskLazyFrame._with_versionb   s    ~~dkk7~;;r>   c                <    | j                  || j                        S rF   )ra   r5   )r9   dfs     r<   _with_nativezDaskLazyFrame._with_nativee   s    ~~b$--~88r>   c                0    t        || j                        S )N)	available)r   columns)r9   subsets     r<   _check_columns_existz"DaskLazyFrame._check_columns_existh   s    "6T\\BBr>   c              #  X   K   | j                   j                         D ]	  \  }}|  y wr3   )rb   items)r9   _colsers      r<   _iter_columnszDaskLazyFrame._iter_columnsk   s&     **,ID#I -s   (*c                d    |j                  |       }t        |      dk7  rd}t        |      |d   S )N   z4multi-output expressions not allowed in this contextr   )_calllenr   )r9   rC   resultsrX   s       r<   _evaluate_single_output_exprz*DaskLazyFrame._evaluate_single_output_expro   s5    ))D/w<1HC,S11qzr>   c           	         t        | g| }| j                   | j                  j                  di t	        |            S )Nr^   )r   rf   rb   assigndict)r9   exprs
new_seriess      r<   with_columnszDaskLazyFrame.with_columnsv   s<    #D151
  !3!3!3!Gd:6F!GHHr>   c                    | j                   j                  d
i |}||t        j                  u r+ddlm}  ||t        j                  d| j                  d      S |t        j                  u r.dd l}ddl	m
}  ||j                  |      d| j                        S |t        j                  u r9dd l}ddlm}  ||j                   j                  |      d| j                  d      S d	| }	t#        |	      )Nr   )PandasLikeDataFrameT)implementationr1   r;   validate_column_names)PolarsDataFrame)r1   r;   )ArrowDataFrame)r1   r;   r   zUnsupported `backend` value: r^   )rb   computer   PANDASnarwhals._pandas_like.dataframer~   r5   POLARSpolarsnarwhals._polars.dataframer   from_pandasPYARROWpyarrownarwhals._arrow.dataframer   Table
ValueError)
r9   backendkwargsresultr~   plr   par   rX   s
             r<   collectzDaskLazyFrame.collectz   s     %$$.v.?g)>)>>K&-44)-&*  n+++B"v&)-  n,,, @!$$V,)-&*	  .gY7or>   c                    | j                   J| j                  t        | j                        n#| j                  j
                  j                         | _         | j                   S r3   )r7   r6   listschemarb   ri   tolistrP   s    r<   ri   zDaskLazyFrame.columns   sV    ' &&2 T[[![[((//1  
 ###r>   c                h     ||       d   }| j                  | j                  j                  |         S )Nr   )rf   rb   loc)r9   	predicatemasks      r<   filterzDaskLazyFrame.filter   s.    q!  !677r>   c                |    | j                   }t        |t        |      | j                        }| j	                  |      S r3   )rb   r	   r   rS   rf   )r9   column_namesre   rb   s       r<   simple_selectzDaskLazyFrame.simple_select   s5    'D,>@T@TU  ((r>   c           	         t        | g| }t        j                  |D cg c]  \  }}|j                  |       c}}d      }| j	                  |      S c c}}w )Nrr   axis)r   rA   concatrenamerf   )r9   rz   r{   namevalre   s         r<   	aggregatezDaskLazyFrame.aggregate   sT    #D151
YY*E*YT3

4(*EAN  $$ Fs   A
c           	         t        | g| }| j                  }t         |j                  di t	        |      |D cg c]  }|d   	 c}| j
                        }| j                  |      S c c}w )Nr   r^   )r   rb   r	   rx   ry   rS   rf   )r9   rz   r{   re   ss        r<   selectzDaskLazyFrame.select   sq    #D151
#BII)Z()%&:aQqT:&  

   $$ 's   A+
c                    |)| j                  | j                  j                               S | j                         }|j	                   |j
                  | j                         d       }| j                  |      S )NT)ignore_nulls)rf   rb   dropnar\   any_horizontalcolis_nullr   )r9   rj   plxr   s       r<   
drop_nullszDaskLazyFrame.drop_nulls   sn    >$$T[[%7%7%9::))+""7377F#3#;#;#=D"QQ{{4  r>   c           	         | j                   b| j                  j                  }| j                  j                  D ci c]'  }|t	        ||   | j
                  | j                        ) c}| _         | j                   S c c}w r3   )r6   rb   dtypesri   r   r5   rS   )r9   native_dtypesr   s      r<   r   zDaskLazyFrame.schema   s    & KK..M
  ;;..	# /C -!#&t7K7K  /	#D """#s   ,A;c                    | j                   S r3   )r   rP   s    r<   collect_schemazDaskLazyFrame.collect_schema   s    {{r>   c               t    t        | ||      }| j                  | j                  j                  |            S )Nstrictri   )r   rf   rb   drop)r9   ri   r   to_drops       r<   r   zDaskLazyFrame.drop   s3    'gfE  !1!1'!1!BCCr>   c                   |%| j                  t        | j                  |            S | j                         }| j                  }|j                  dd       j                  |      j                         }|j                  |      j                  d      j                  g |      |j                  dd       j                         z
  }| j                  |      j                  | |j                  |       S )Nrr   dtypeF)reverse)partition_byorder_by)rf   r   rb   r\   ri   litalias	broadcastr   cum_sumoverr|   r   )r9   r   r   r   ri   
const_exprrow_index_exprs          r<   with_row_indexzDaskLazyFrame.with_row_index   s     $$]4;;%EFF))+,,WWQdW+11$7AAC
GGDM!!%!0552PX5Yggatg$..01 	   ,33NGCGGWDUVVr>   c                X    | j                  | j                  j                  |            S )Nr   )rf   rb   r   )r9   mappings     r<   r   zDaskLazyFrame.rename   s%      !3!3G!3!DEEr>   c                \    | j                  | j                  j                  |dd            S )NFnr   npartitions)rf   rb   head)r9   r   s     r<   r   zDaskLazyFrame.head   s*      !1!1AuRT!1!UVVr>   c               <   |r| j                  |      x}r||dk(  r|xs | j                  }t        d|d      }| j                  j	                  |      j                         j                  |      }||dk(     }|j                         j                  |      }| j                  j                  ||d      }nSd	d
ij                  ||      }	|r | j                  |dddj                  }
n| j                  }
|
j                  ||	      }| j                  |      S )Nnone   count_n_bytesri   prefixrr   r   inner)onhowanyfirstF)
descending
nulls_last)rj   keep)rk   ri   r   rb   groupbysizer   reset_indexr   mergegetsortdrop_duplicatesrf   )r9   rj   r   r   errortokenro   uniquer   mapped_keeprb   s              r<   r   zDaskLazyFrame.unique   s     9 9& AAuAK6>+t||F26(E ++%%f-224;;EBCcQh-C__&++E+:F[[&&v&g&FF '*..tT:K"H5QXX++6+LF  ((r>   c                   t        |t              r| }n|D cg c]  }|  }}|rdnd}| j                  | j                  j	                  t        |      ||            S c c}w )Nlastr   )	ascendingna_position)r@   boolrf   rb   sort_valuesr   )r9   r   r   byr   dpositions          r<   r   zDaskLazyFrame.sort  sg    j$'/9>I(23
1Q
I3'6W  KK##DH	x#X
 	
 4s   
A!c                  | j                   }| j                  t        |      }t        |t              rXt        fd|D              rD|r!| j                  |j                  ||            S | j                  |j                  ||            S t        |t              r|gt        |      z  }| j                  |j                  |t        |            j                  |dd            S )Nc              3  D   K   | ]  }|   j                           y wr3   )
is_numeric).0xr   s     r<   	<genexpr>z&DaskLazyFrame.top_k.<locals>.<genexpr>  s     ,PRVAY-A-A-CRs    )r   Fr   r   )rb   r   r   r@   r   allrf   	nsmallestnlargestrt   r   r   )r9   kr   r   re   r   s        @r<   top_kzDaskLazyFrame.top_k  s    [["Xgt$,PR,P)P((a)<==$$R[[B%788gt$i#b')G  NN2gN7<<U = 
 	
r>   c               |    | j                   j                  |d      j                  |j                   ||dd|f      S )Nr   rj   r   r    left_onright_onr   suffixes)rb   r   r   )r9   otherr  r  suffixs        r<   _join_innerzDaskLazyFrame._join_inner+  sH     {{!!e!<BBLL&\ C 
 	
r>   c                  | j                   j                  |j                   j                  |d      d||d|f      }t        ||d      D cg c]   \  }}||k7  r|| j                  vr|n| | " }}}|j                  |      S c c}}w )	Nr   r  leftr  r   r  r  r	  Tr   r   )rb   r   r   zipri   r   )	r9   r
  r  r  r  result_nativeleft_key	right_keyextras	            r<   
_join_leftzDaskLazyFrame._join_left6  s     ))LLxU;&\ * 
 (+7HT'J
'J#)H$ #$,,6Iyk&<RR'J 	 

 !!%!00
s   %B	c                  | j                   }t        |||      }|j                   j                  |      }t        |j                         t        |j                               }|t        |         j                         j                  d      }	||   j                         j                  d      }
t        |j                        j                  | j                        j                  |      }||
   j                  |D ci c]	  }|| |  c}      }||	    j                  ||
    ||dd|f      }t        j                  |||	   |gdd      S c c}w )	Nr   rr   r   outerr  r  r   )r   join)rb   r   r   r   ri   r   valuesisnar   setintersection
differencer   rA   r   )r9   r
  r  r  r  self_nativeright_on_mapperother_nativeright_suffixedleft_null_maskright_null_mask	to_renamer   right_null_rowsjoin_results                  r<   
_join_fullzDaskLazyFrame._join_fullG  sd   
 kk/6J||**?*C%l&:&:;o4467$T']388:>>A>F&~6;;=AAqAI &33DLLALLXV	&7>>6?@isSSE&**i@ ? 
 ">/288/)*#&\ 9 
 yy+n5G
 	
 As   Ec                  t        dg | j                  |j                  d      } | j                  j                  d	i |dij	                   |j                  j                  d	i |did||d|f      j                  |      S )
Nr   cross_join_key_r   r   r   r  r  r   r^   )r   ri   rb   rx   r   r   )r9   r
  r  	key_tokens       r<   _join_crosszDaskLazyFrame._join_crossh  s    2>>>GX
	 DKK0)Q0U###5y!n5!"f   T)T$
	
r>   c                   | j                  |t        |      t        t        ||d                  }| j                  j                  |d      j                  |d||      S )NFr   r
  columns_to_selectcolumns_mappingr   r  r   )r   r  r  )_join_filter_renamer   ry   r  rb   r   r   )r9   r
  r  r  r   s        r<   
_join_semizDaskLazyFrame._join_semix  sl     //"8n Xwu!EF 0 

 {{!!e!<BBgw C 
 	
r>   c               N   t        dg | j                  |j                  d      }| j                  |t        |      t	        t        ||d                  }| j                  j                  |j                  |d      d	|||
      }|||   dk(     j                  |g      S )Nr   join_indicator_r   Fr   r-  r   r  r  )r   	indicatorr  r  	left_onlyr   )
r   ri   r0  r   ry   r  rb   r   r   r   )r9   r
  r  r  indicator_tokenr   re   s          r<   
_join_antizDaskLazyFrame._join_anti  s     9>>>GX
 //"8n Xwu!EF 0 

 [[wE:%  
 "_%45::OCT:UUr>   c                    |j                   }t        ||| j                        j                  |      j	                         S )zHelper function to avoid creating extra columns and row duplication.

        Used in `"anti"` and `"semi`" join's.

        Notice that a native object is returned.
        r   )rb   r	   rS   r   r   )r9   r
  r.  r/  r   s        r<   r0  z!DaskLazyFrame._join_filter_rename  s;     $)<< #<1BDDXDXYVOV,_	
r>   c                  |dk(  r| j                  ||      }n||t        ||      |dk(  r| j                  ||||      }nu|dk(  r| j                  |||      }n[|dk(  r| j	                  |||      }nA|dk(  r| j                  ||||      }n&|d	k(  r| j                  ||||      }nt        |       | j                        S )
Ncross)r
  r  r   )r
  r  r  r  anti)r
  r  r  semir  full)	r+  r   r  r7  r1  r  r'  r
   rf   )r9   r
  r   r  r  r  r   s          r<   r  zDaskLazyFrame.join  s     '>%%E&%AF_ 0Wh//G^%%Wx & F F]__5'H_UFF]__5'H_UFF]__Wx % F F]__Wx % F   ((r>   c                   | j                         }| j                  |j                  | j                  |j                  |||||d|f            S )Nr  )r  r  left_byright_by	directionr	  )rY   rf   
merge_asofrb   )	r9   r
  r  r  by_leftby_rightstrategyr  r   s	            r<   	join_asofzDaskLazyFrame.join_asof  sZ     '')  NN!!"f  	
 	
r>   c               $    ddl m}  || ||      S )Nr   r!   )drop_null_keys)narwhals._dask.group_byr"   )r9   keysrH  r"   s       r<   group_byzDaskLazyFrame.group_by  s     	<tT.IIr>   c                    | j                   }|j                  }|dk(  r,| j                  | j                   j                  |d            S d}t	        |      )Nrr   F)r   r   zL`LazyFrame.tail` is not supported for Dask backend with multiple partitions.)rb   r   rf   tailNotImplementedError)r9   r   native_framen_partitionsrX   s        r<   rM  zDaskLazyFrame.tail  sX    {{#//1$$T[[%5%55%5%IJJZ 	 "#&&r>   c                   t        d| j                  d      }| j                         }|j                  |d       j	                         }|j                  |d       j	                         }|j                  dd       j	                         }| j                  |d       j                  |j                  |      |k\  |j                  |      |z
  |z  |k(  z        j                  |gd      S )	Nr   
row_index_r   r   r   )r   Fr   )	r   ri   r\   r   r   r   r   r   r   )r9   r   offsetrow_index_tokenr   offset_exprn_expr	zero_exprs           r<   gather_everyzDaskLazyFrame.gather_every  s    8t||L
 ))+ggfDg1;;=$'113GGATG*446	$?V)[8GGO,{:fD	QS T?#ET2	
r>   c                ^    | j                  | j                  j                  ||||            S )N)id_vars
value_varsvar_name
value_name)rf   rb   melt)r9   r   indexvariable_namer]  s        r<   unpivotzDaskLazyFrame.unpivot  s:       KK&%	  
 	
r>   c                :    | j                   j                  |       y r3   )rb   
to_parquet)r9   files     r<   sink_parquetzDaskLazyFrame.sink_parquet  s    t$r>   N)r:   dd.DataFramer;   r&   r1   r   returnNone)rC   zdd.DataFrame | Anyrg  zTypeIs[dd.DataFrame])rI   rf  rJ   r'   rg  r   )rg  LazyFrame[dd.DataFrame])rg  r   )rg  r$   )rg  r   )r;   r&   rg  r   )re   r   rg  r   )rj   Sequence[str]rg  zColumnNotFoundError | None)rg  zIterator[dx.Series])rC   r    rg  z	dx.Series)rz   r    rg  r   )r   z_EagerAllowedImpl | Noner   r   rg  r   )rg  	list[str])r   r    rg  r   )r   strrg  r   )rj   Sequence[str] | Nonerg  r   )rg  zdict[str, DType])ri   rj  r   r   rg  r   )r   rl  r   rm  rg  r   )r   zMapping[str, str]rg  r   )r   intrg  r   )rj   rm  r   r-   r   rm  rg  r   )r   rl  r   bool | Sequence[bool]r   r   rg  r   )r  rn  r   zIterable[str]r   ro  rg  r   )
r
  r   r  rj  r  rj  r  rl  rg  rf  )r
  r   r  rl  rg  rf  )r
  r   r  rj  r  rj  rg  rf  )r
  r   r.  rk  r/  zdict[str, str]rg  rf  )r
  r   r   r,   r  rm  r  rm  r  rl  rg  r   )r
  r   r  rl  r  rl  rC  rm  rD  rm  rE  r+   r  rl  rg  r   )rJ  z"Sequence[str] | Sequence[DaskExpr]rH  r   rg  r"   )r   rn  rS  rn  rg  r   )
r   rm  r_  rm  r`  rl  r]  rl  rg  r   )rd  zstr | Path | BytesIOrg  rh  )6__name__
__module____qualname__r   rT   rS   r=   staticmethodrD   classmethodrK   rQ   rY   r\   r_   rc   rf   rk   rp   rv   r|   r   propertyri   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r'  r+  r1  r7  r0  r  rF  rK  rM  rX  ra  re  r   exploder^   r>   r<   r0   r0   4   sO    %))O */-&- 	-
 #'- 
- - - 3 3;"4
<9CI(/(;>(	(T $ $8
)
%
%! 	# 	#D
WFW)$) !	)
 ') 
)6

 	
	
'4	
@M	
WZ	
		
11'41@M1WZ1	1"

'4
@M
WZ
	
B
 



'4

@M

	

VV'4V@MV	V(

.7
JX
	
"!)!) 	!)
 &!) '!) !) 
!)F

 	

 
 &
 '
 #
 
 

2J6JKOJ	J	'
"
 
 $
 	

 
 

 % Gr>   r0   )r    rf  ri  )F
__future__r   typingr   r   dask.dataframe	dataframerA   narwhals._dask.utilsr   r   narwhals._pandas_like.utilsr   r	   narwhals._typing_compatr
   narwhals._utilsr   r   r   r   r   r   r   r   narwhals.exceptionsr   narwhals.typingr   collections.abcr   r   r   r   ior   pathlibr   typesr   r   dask.dataframe.dask_expr	dask_exprdxtyping_extensionsr   r   narwhals._compliant.typingr   narwhals._dask.exprr    rI  r"   r[   r$   narwhals._typingr%   r&   r'   narwhals.dataframer(   narwhals.dtypesr)   r*   r+   r,   r-   r.   __annotations__r0   r^   r>   r<   <module>r     s    " %  > X 0	 	 	 ; .EE  )).@,7628,%7RR
I g LMg r>   