
    yj
                     R    d Z ddlmZ ddlZddlmZ ddlmZ  G d de          Z	dS )	z8
Locality-Sensitive Hashing based surprisal estimation.
    )AnyN)NDArray   )SurprisalTreec            	       "    e Zd ZU dZeed<   eed<   eeeef                  ed<   eee	j
        e                           ed<   eed<   eed<   	 ddeded
eddf fdZdeddfdZde	j        dedefdZde	j        ddfdZde	j        defdZ xZS )LSHSurprisalz
    Locality-Sensitive Hashing based surprisal estimation.
    O(1) operations using hash collision frequency as density proxy.
    
num_tablesnum_bitstableshash_directionsinitializedtotal_points
      max_leaf_sizereturnNc                     t                                          |           || _        || _        d t	          |          D             | _        g | _        d| _        d S )Nc                     g | ]}i S  r   ).0_s     =/DATA/AppData/hermes/projects/honcho/src/dreamer/trees/lsh.py
<listcomp>z)LSHSurprisal.__init__.<locals>.<listcomp>    s    555ar555    F)super__init__r	   r
   ranger   r   r   )selfr	   r
   r   	__class__s       r   r   zLSHSurprisal.__init__   s^     	'''$ 555#4#4555! r   dimc                    | j         st          | j                  D ]f}t          j                            | j        |          }|t          j                            |dd          z  }| j	        
                    |           gd| _         dS dS )z0Initialize random projection directions for LSH.r   T)axiskeepdimsN)r   r   r	   nprandomrandnr
   linalgnormr   append)r   r    r   
directionss       r   _initialize_hash_functionsz'LSHSurprisal._initialize_hash_functions$   s     		$4?++ 8 88:	M39 9
 (")..Q +9 + + 
 $++J7777#D		$ 		$r   point	table_idxc                     | j         |         |z  }|dk                        t          j                  }t	          d                    d |D                       d          }|S )z'Hash a vector using random projections.r    c              3   4   K   | ]}t          |          V  d S )N)str)r   bs     r   	<genexpr>z,LSHSurprisal._hash_vector.<locals>.<genexpr>5   s(      66!s1vv666666r      )r   astyper$   intpintjoin)r   r,   r-   projectionsbinaryhash_vals         r   _hash_vectorzLSHSurprisal._hash_vector1   s]    151Ei1PSX1X$/!O#;#;BG#D#Drww66v66666::r   c                    | j         s"|                     t          |                     t          | j                  D ]7\  }}|                     ||          }|                    |d          dz   ||<   8| xj        dz  c_        d S )Nr   r   )r   r+   len	enumerater   r<   getr   )r   r,   itablebuckets        r   insertzLSHSurprisal.insert8   s     	8++CJJ777!$+.. 	5 	5HAu&&ua00F!IIfa0014E&MMQr   c                    | j         dk    s| j        st          d          S g }t          | j                  D ]F\  }}|                     ||          }|                    |d          }|                    |           Gt          j	        |          }t          |          | j         z  }t          t          j
        |dz                        S )z
        Compute surprisal using hash collision frequency.
        High collision = low surprisal (common pattern)
        Low collision = high surprisal (rare pattern)
        r   infg|=)r   r   floatr?   r   r<   r@   r)   r$   meanlog)	r   r,   countsrA   rB   rC   count	avg_countavg_densitys	            r   	surprisalzLSHSurprisal.surprisalB   s     !!)9!<<!$+.. 	! 	!HAu&&ua00FIIfa((EMM%    GFOO	I&&)::bf[50111222r   )r   r   r   )__name__
__module____qualname____doc__r7   __annotations__listdictr   r$   floatingr   boolr   r+   ndarrayr<   rD   rG   rN   __classcell__)r   s   @r   r   r      sl         
 OOOMMMc3h    '"+c"234444 MO! !!.1!FI!	! ! ! ! ! !$c $d $ $ $ $"*      BJ 4    3rz 3e 3 3 3 3 3 3 3 3r   r   )
rR   typingr   numpyr$   numpy.typingr   baser   r   r   r   r   <module>r^      s                                G3 G3 G3 G3 G3= G3 G3 G3 G3 G3r   