
    yj?                        d Z ddlZddlmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z#  ed          Z$de%fdZ&ddde'e$         de(de(de'e$         fdZ)	 d+de*de'e+         de(de,e*ef         dz  de'e*         f
dZ-	 d+dede'e*         de,e*ef         dz  de'ej.                 fd Z/	 d+dede*de'e+         de(de,e*ef         dz  de'ej.                 fd!Z0ded"e'ej.                 de*d#e*de'ej.                 f
d$Z1ded%e*d&ee2ej.                          de(de'ej.                 f
d'Z3dd(d)d%e*de,e*ef         dz  de(de'ej.                 fd*Z4dS ),z
Reciprocal Rank Fusion (RRF) utilities for combining search results.

RRF is a method to combine multiple ranked lists by computing the reciprocal
of each item's rank in each list, then summing these reciprocal ranks.
    N)AnyTypeVar)Selectand_funcor_select)AsyncSession)models)settings)
tracked_db)embedding_client)ValidationException)session_peers_table)EmbeddingCallPurpose)apply_filter)ILIKE_ESCAPE_CHARescape_ilike_pattern)embedding_call_purpose)get_external_vector_storeTreturnc                  P    t           j        j        dk    pt           j        j         S )zGReturn True when semantic message search can stay entirely in Postgres.pgvector)r   VECTOR_STORETYPEMIGRATED     8/DATA/AppData/hermes/projects/honcho/src/utils/search.py_uses_pgvector_message_searchr!      s%     	"j0V8M8V4Vr   <   )kranked_listsr#   limitc                     |sg S i }|D ]7}t          |d          D ]$\  }}||vrd||<   ||xx         d| |z   z  z  cc<   %8t          |                                d d          }d |D             }|d|         S )	aj  
    Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).

    RRF assigns a score to each item based on the formula:
    RRF_score = sum(1 / (k + rank_i)) for all lists where the item appears

    Where:
    - k is a constant (typically 60) that controls the impact of high-ranked items
    - rank_i is the rank of the item in list i (1-indexed)

    Args:
        *ranked_lists: Variable number of ranked lists to combine
        k: RRF constant parameter (default: 60)
        limit: Maximum number of results to return

    Returns:
        list of items ranked by RRF score (highest score first)
       g        g      ?c                     | d         S )Nr'   r   )xs    r    <lambda>z(reciprocal_rank_fusion.<locals>.<lambda>F   s
    AaD r   T)keyreversec                     g | ]\  }}|S r   r   ).0item_s      r    
<listcomp>z*reciprocal_rank_fusion.<locals>.<listcomp>I   s    ///wtQd///r   N)	enumeratesorteditems)	r#   r%   r$   
rrf_scoresranked_listrankr/   sorted_itemsresults	            r    reciprocal_rank_fusionr:   $   s    &  	 "$J $ 1 1#K33 	1 	1JD$:%%#&
4 tq4x 00		1 ***,,..$OOOL 0/,///F&5&>r   workspace_nameembedding_queryfiltersc                   K   t                      }|g S |                    d|           }i }|rd|v r|d         |d<   d|v r|d         |d<   |                    |||dz  |r|nddg	           d{V }|sg S i }|D ]'}	|	j                            d          }
|
r	|
|vrd||
<   (t          |                                          S )
z?Query the external vector store and return ordered message IDs.Nmessage
session_idsession_namepeer_id	peer_name   
message_id)top_kr=   include_attributes)r   get_vector_namespacequerymetadatagetlistkeys)r;   r<   r%   r=   external_vector_store	namespacevector_filtersvector_resultsseen_message_idsr9   rE   s              r    !query_external_vector_message_idsrS   N   s@      677$	%::9nUUI%'N =7""-4\-BN>**1)*<N;' 166ai"0:d(> 7        N  	(*  0 0_((66
 	0*,<<<+/Z( %%''(((r   dbmessage_idsc                   K   |sg S t          t          j                                      t          j        j                            |                    }t          |t          j        |          }|                     |           d{V }d |                                	                                D             fd|D             S )z5Fetch messages by ID and preserve the input ordering.Nc                     i | ]
}|j         |S r   )	public_id)r.   msgs     r    
<dictcomp>z)fetch_messages_by_ids.<locals>.<dictcomp>   s    EEEssEEEr   c                 (    g | ]}|v |         S r   r   )r.   msg_idmessagess     r    r1   z)fetch_messages_by_ids.<locals>.<listcomp>   s(    MMM&H:L:LHV:L:L:Lr   )
r	   r   MessagewhererX   in_r   executescalarsall)rT   rU   r=   stmtr9   r]   s        @r    fetch_messages_by_idsre   x   s        	&.!!''(@(D(D[(Q(QRRDfng66D::d########FEEfnn.>.>.B.B.D.DEEEHMMMM;MMMMr   c                   K   t           j        j                            |          }t	          t           j                                      t           j        t           j        j        t           j        j        k              	                    t           j        j        
                    d                    	                    t           j        j        |k              }|r4|                                }||d<   t          |t           j        |          }|                    |                              |dz            }|                     |           d{V }t#                      }	g }
|                                                                D ]:}|j        |	vr/|	                    |j                   |
                    |           ;|
d|         S )a  
    Perform semantic message search using pgvector in Postgres.

    Args:
        db: Database session
        workspace_name: Name of the workspace to search in
        embedding_query: Pre-computed embedding for the search query
        limit: Maximum number of results to return
        filters: Optional filters to apply to the message query

    Returns:
        list of messages ordered by semantic similarity
    Nworkspace_id   )r   MessageEmbedding	embeddingcosine_distancer	   r^   joinrX   rE   r_   isnotr;   copyr   order_byr%   ra   setrb   rc   addappend)rT   r;   r<   r%   r=   distance_exprrd   internal_filtersr9   seendedupedrY   s               r    _semantic_search_pgvectorrw      s     ( +5EEoVVM 	v~	#N$(?(JJ

 

 
v&066t<<	=	=	v&5G	H	H 	  D"<<>>+9(D&.2BCC
 ==''--eai88D::d########FUUD$&G~~##%%    =$$HHS]###NN36E6?r   r]   rC   c                 R  K   |sg S t          t                                        t          j        j        |k                                  t          j        j        |k              }|                     |           d{V }|                                }i }|D ]9}|j        }	|	|vrg ||	<   ||	         	                    |j
        |j        f           :g }
|D ]N}|j        |vr||j                 D ]4\  }}|j        |k    r$||j        |k    r|
	                    |            n5O|
S )a  
    Filter messages by peer perspective (temporal session membership).

    Only keeps messages from sessions where the peer was a member at the time
    the message was created (between joined_at and left_at).

    Args:
        db: Database session
        messages: List of messages to filter
        workspace_name: Name of the workspace
        peer_name: Name of the peer whose perspective to use

    Returns:
        Filtered list of messages
    N)r	   r   r_   cr;   rC   ra   rc   rA   rr   	joined_atleft_at
created_at)rT   r]   r;   rC   session_memberships_queryr9   membershipssession_windows
membershiprA   filtered_messagesrY   rz   r{   s                 r    _filter_by_peer_perspectiver      sn     *  	 	"##	"$3~E	F	F	"$.);	<	< 
 ::788888888F**,,K 9;O! Y Y
!...,.OL)%,,j.BJDV-WXXXX /1 
 
?22 #2#2B"C 	 	Iw~**3>W#<#<!((---r   rI   rd   c                   K   t          t          j        d|                    }t          |          }|rzt          j        j                            d| dt                    }|	                    |          
                    t          j        j                                                  }nG t          j        dt          j        j                                      d          t          j        d|                    }t#          |t          j        j                            d| dt                              }	|	                    |	          
                    t          j        t          j        t          j        dt          j        j                  t          j        d|                    d                                          t          j        j                                                  }|                    |          }|                     |           d{V }
t-          |
                                                                          S )a2  
    Perform full-text search using PostgreSQL FTS and ILIKE fallback.

    Args:
        db: Database session
        query: Search query
        stmt: Base SQL query conditions
        limit: Maximum number of results to return

    Returns:
        list of messages ordered by text search relevance
    z&[~`!@#$%^&*()_+=\[\]{};\':"\\|,.<>/?-]%)escapeenglishz@@r   N)boolresearchr   r   r^   contentiliker   r_   ro   r|   descr   to_tsvectoropplainto_tsqueryr   coalescets_rankr%   ra   rL   rb   rc   )rT   rI   rd   r%   has_special_charsescaped_querysearch_conditionfulltext_queryfts_conditioncombined_conditionr9   s              r    _fulltext_searchr      s     & 
	;UCC 
 )//M  
!>177    ): 8 
 
 $455>>N%**,,
 

 U(FN4JKKNNtTT E22
 

 !N"(($M$$$-> )  
 
 $677@@M$Y0FGG(E::    dffN%**,,

 

 $))%00N::n--------F  $$&&'''r   
   )r=   r%   c                   	
K   t          t          j                  d	rMdv rHd         	d                                 D             rdvrdvrt	          d                              t          t          t          j        j        t          j	        j        k    t          j        j
        t          j	        j
        k    t          j        j        t          j	        j        k    t          t          j	        j                            d          t          j        j        t          j	        j        k                                                      t          j	        j        	k              t%          t          j                  drA                    d          p                    d          }t)          |t*                    r|	rdz  ndz  d
dt,          j        rt)          t*                    r	 t1          t2          j        j        d	
          5  t9          j                    d{V 
ddd           n# 1 swxY w Y   n5# t<          $ r(}t	          dt,          j        j          d          |d}~ww xY wtC                      stE          
           d{V dtF          dtH          t          j                 f	 
f	d}tK          d          4 d{V } ||           d{V }|D ]}|&                    |           |cddd          d{V  S # 1 d{V swxY w Y   dS )a  
    Search across message content using a hybrid approach with Reciprocal Rank Fusion (RRF).

    This function combines semantic search and full-text search results using RRF when both
    are available, providing better search results than either method alone.

    Args:
        query: Search query to match against message content
        filters: Optional filters to scope search (must include workspace_id for semantic search).
            Special filter 'peer_perspective' will search across all messages from sessions that the peer is/was a member of,
            filtered by the time window when they were actually in the session.
        limit: Maximum number of results to return

    Returns:
        list of messages that match the search query, ordered by RRF relevance or individual search relevance

    Raises:
        ValidationException: If query exceeds maximum token limit for embeddings
    Npeer_perspectivec                 &    i | ]\  }}|d k    ||S )r   r   )r.   r#   vs      r    rZ   zsearch.<locals>.<dictcomp>[  s)    OOODAqq<N7N7N1a7N7N7Nr   rg   r;   zMpeer_perspective requires a workspace scope (workspace_id or workspace_name).   rh   api)r;   parent_categoryz%Query exceeds maximum token limit of .)r;   r<   r%   r=   	active_dbr   c                    	K   g }t           j        rt          t                    rtrt	                      rt          | 	           d {V }nt          | 
pg            d {V }rt          | |           d {V }|                    |           t          | dz             d {V }|                    |           t          |          dk    rt          |diS t          |          dk    r|d         d          S g S )N)rT   r;   r<   r%   r=   )rT   rU   r=   rh   )rT   rI   rd   r%   r'   r%   r   )r   EMBED_MESSAGES
isinstancestrr!   rw   re   r   rr   r   lenr:   )r   search_resultssemantic_resultsfulltext_resultsr=   r%   peer_perspective_namerI   query_embeddingsemantic_limitsemantic_message_idsrd   r;   s       r    _run_searchzsearch.<locals>._run_search  s     57 #	4>3//	4  +,.. )B #1$3(#* * * $ $ $ $ $ $   *?  4 :#* * * $ $ $ $ $ $  % )D$")	* * $ $ $ $ $ $  !!"2333!1!)	"
 "
 "
 
 
 
 
 
 
 	.///~"")>GGGG~!##!!$VeV,,	r   zsearch.messages)'r	   r   r^   r4   r   rl   r   r   rA   ry   r;   r|   rz   r   r{   is_r_   rC   r   rK   r   r   r   r   r   r   SEARCH_MESSAGESvaluer   embed
ValueError	EMBEDDINGMAX_INPUT_TOKENSr!   rS   r
   rL   r   expunge)rI   r=   r%   workspace_valueer   
managed_dbcombined_resultsr?   r   r   r   r   rd   r;   s   ```      @@@@@@r    r   r   :  s     4 &.!!D )- J%00 '(: ;OOGMMOOOOO 	')).>g.M.M%_   yy+/B/D/QQ-1D1F1UU)-@-B-LL')155d;;N-1D1F1NN 	 
 
 %#%/3HH
I
I 	 fng66D!%N -!++n55VEU9V9Vos++ 	-,N"7FUQYYUQYN*.O-1 :nc#B#B 
	'$4:- %   F F
 )9(>u(E(E"E"E"E"E"E"EF F F F F F F F F F F F F F F  	 	 	%^8J8[^^^ 	
 -.. 	)J- /$	* * * $ $ $ $ $ $ -\ -d6>6J - - - - - - - - - - - - - -^ +,,              
!,Z!8!8888888' 	( 	(Gw''''	                                                           sN   	!I *II II II 
J'#J

J:-L::
MM)N)5__doc__r   typingr   r   
sqlalchemyr   r   r   r   r	   sqlalchemy.ext.asyncior
   srcr   
src.configr   src.dependenciesr   src.embedding_clientr   src.exceptionsr   
src.modelsr   src.telemetry.eventsr   src.utils.filterr   src.utils.formattingr   r   src.utils.typesr   src.vector_storer   r   r   r!   rL   intr:   r   floatdictrS   r^   re   rw   r   tupler   r   r   r   r    <module>r      s    
			         6 6 6 6 6 6 6 6 6 6 6 6 6 6 / / / / / /             ' ' ' ' ' ' 1 1 1 1 1 1 . . . . . . * * * * * * 5 5 5 5 5 5 ) ) ) ) ) ) H H H H H H H H 2 2 2 2 2 2 6 6 6 6 6 6GCLLt     =? ' ' '$q' 'c 's 'tTUw ' ' ' '\ &*	') ')')%[') ') #s(^d"	')
 
#Y') ') ') ')Z &*N NNcN #s(^d"N 
&.	N N N N. &*1 111 %[1 	1
 #s(^d"1 
&.1 1 1 1h776>"7 7 	7
 
&.7 7 7 7t?(?(?( v~&
'?( 	?(
 
&.?( ?( ?( ?(J &*	M  M  M M  #s(^d"M  	M 
 
&.M  M  M  M  M  M r   