
    yj~F                    l   d Z ddlmZ ddlZddlmZmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/  ej0        e1          Z2 e
de          Z3eddddddddddddddddddddddSdD            Z4eddddddddddddddddddddddEdTdH            Z4edddddddddddIdddddddddddEdUdM            Z4 edNO          ddddddddddddddddddddddEdVdQ            Z4dRgZ5dS )Wut  Public LLM entrypoint: `honcho_llm_call`.

Orchestrates:
- Runtime config resolution from ConfiguredModelSettings → ModelConfig.
- Per-attempt planning (primary vs fallback selection).
- Retry with exponential backoff via tenacity.
- Tool-loop delegation when tools are supplied.
- Single-call delegation to the executor otherwise.
- Reasoning-trace telemetry emission.
    )annotationsN)AsyncIteratorCallable)AnyLiteralTypeVarcastoverload)	BaseModel)ai_track)retrystop_after_attemptwait_exponential)ConfiguredModelSettingsModelConfig)ValidationException)conditional_observe)log_reasoning_trace   )honcho_llm_call_inner)AttemptPlancurrent_attempteffective_temperatureplan_attemptresolve_runtime_model_config#update_current_langfuse_observation)execute_tool_loop)HonchoLLMCallResponseHonchoLLMCallStreamChunkIterationCallbackLLMTelemetryContextReasoningEffortTypeStreamingResponseWithMetadataM)boundFT   
   )
track_name	json_modetemperature	stop_seqsreasoning_effort	verbositythinking_budget_tokensenable_retryretry_attemptsstreamstream_final_onlytoolstool_choicetool_executormax_tool_iterationsmessagesmax_input_tokens
trace_nameiteration_callback	telemetrymodel_config%ModelConfig | ConfiguredModelSettingspromptstr
max_tokensintr(   
str | Noneresponse_modeltype[M]r)   boolr*   float | Noner+   list[str] | Noner,   r"   r-   'Literal['low', 'medium', 'high'] | Noner.   
int | Noner/   r0   r1   Literal[False]r2   r3   list[dict[str, Any]] | Noner4   str | dict[str, Any] | Noner5   +Callable[[str, dict[str, Any]], Any] | Noner6   r7   r8   r9   r:   IterationCallback | Noner;   LLMTelemetryContext | NonereturnHonchoLLMCallResponse[M]c                
   K   d S N r<   r>   r@   r(   rC   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   s                           3/DATA/AppData/hermes/projects/honcho/src/llm/api.pyhoncho_llm_callrW   3   s      6  #s    )r(   rC   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   NoneHonchoLLMCallResponse[str]c                
   K   d S rS   rT   rU   s                           rV   rW   rW   Q   s      6 "%rX   .type[BaseModel] | NoneLiteral[True]GAsyncIterator[HonchoLLMCallStreamChunk] | StreamingResponseWithMetadatac                
   K   d S rS   rT   rU   s                           rV   rW   rW   o   s      6 ORcrX   zLLM CallnamedHonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk] | StreamingResponseWithMetadatac                6  	
&'(K   t          |           (rr|st          d          t          j        d           d2(
fd&d3&	fd}|}r t	                    |          }d4(fd}|r8 t          t                    t          ddd          |          |          }d5(
fd}d6(fd}d7(fd}r|s&d}|}|+ddlm	} m
}! |pddg}" | |"          |k    } |!|"|          }|{|'d3&'	fd}#|#}$r t	                    |$          }$|r8 t          t                    t          ddd          |          |$          }$ |$             d{V }%n |             d{V }%|rt          |%t                    rd|%_        |rFt          |%t                    r1t          |(|% |             |             |            |
  
         |%S t          d8i ddd |d!d"d#|d$|d%d&d'd(d)	d*|d+d,|d-&d.|d/|d0|d1 d{V }%|rFt          |%t                    r1t          |(|% |             |             |            |
  
         |%S )9ay  Make an LLM call with retry, optional backup failover, and optional tool loop.

    Backup provider/model (if configured on the primary ModelConfig's
    `fallback`) is used on the final retry attempt, which is 3 by default.

    Raises:
        ValidationException: If streaming and tool calling are combined
                             without `stream_final_only=True`.
    zStreaming is not supported with tool calling. Set stream=False when using tools, or use stream_final_only=True to stream only the final response after tool calls.r   rP   r   c                     t          t          j                              } t          | j        | j                   | S )N)runtime_model_configattemptr0   call_thinking_budget_tokenscall_reasoning_effortr`   )r   r   getr   providermodel)planr,   r0   re   r.   r(   s    rV   _get_attempt_planz*honcho_llm_call.<locals>._get_attempt_plan   s\    !5#')))(>"2
 
 
 	,MJ	
 	
 	
 	

 rX   DHonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]c                 h  K                } rRt          | j        | j        t          	          | j        | j        d| j        
| j        |            d{V S t          | j        | j        t          	          | j        | j        d| j        
| j        |            d{V S )zSelect provider/model based on current attempt, then call once.

        This closure is what tenacity wraps, so selection re-runs per attempt
        (and the fallback kicks in on the final attempt automatically).
        T)r1   client_overrider3   r4   selected_configrl   r;   NFr   rj   rk   r   r,   r.   clientrq   )rl   rm   r)   r@   r>   rC   r+   r1   r;   r*   r4   r3   r-   s    rV   _call_with_provider_selectionz6honcho_llm_call.<locals>._call_with_provider_selection   s%      ! "" 	.
%k22%+ $' $ 4#%         ( +MJ!+..!' K# 0%
 
 
 
 
 
 
 
 
 	
rX   retry_stater   rY   c                F   | j         dz   }t          j        |           | j        r| j                                        nd}|r]t
                              d| j          d dj         dj         d| z              t
          	                    d| d            dS dS )zUpdate attempt counter before each retry + log transient failures.

        tenacity's before_sleep fires AFTER an attempt fails, BEFORE sleeping,
        so we increment to the next attempt number here.
        r   NzError on attempt /z with z: zWill retry with attempt )
attempt_numberr   setoutcome	exceptionloggerwarning	transportrk   info)ru   next_attemptexcr0   re   s      rV   before_retry_callbackz.honcho_llm_call.<locals>.before_retry_callback  s     #1A5L)))1<1DNk!++---$ 	TNNWK$>WWWWW)3YY6J6PYYTWYYZ   KKR<RR.RRSSSSS	T 	TrX      r'   )
multiplierminmax)stopwaitbefore_sleeprI   c                     n j         S rS   )r.   )re   r.   s   rV   _trace_thinking_budgetz/honcho_llm_call.<locals>._trace_thinking_budget(  s    
 &1 #"%<	
rX   r"   c                 L    S j         } | rt          t          |           nd S rS   )thinking_effortr	   r"   )config_effortr,   re   s    rV   _trace_reasoning_effortz0honcho_llm_call.<locals>._trace_reasoning_effort1  s2    '##,<;HRt'777dRrX   rG   c                     n j         S rS   )stop_sequences)re   r+   s   rV   _trace_stop_seqsz)honcho_llm_call.<locals>._trace_stop_seqs7  s    ".II4H4W	
rX   FN)count_message_tokenstruncate_messages_to_fituser)rolecontentc                 \  K                } rOt          | j        | j        ft          
          | j        | j        d| j        | j        | 	d d {V S t          | j        | j        ft          
          | j        | j        d| j        | j        | 	d d {V S )NT)rC   r)   r*   r+   r,   r-   r.   r1   rp   r3   r4   rq   rl   r;   r7   Frr   )rl   rm   captured_messagesr)   r@   r>   rC   r+   r1   r;   r*   r4   r3   r-   s    rV   _toolless_callz'honcho_llm_call.<locals>._toolless_call\  s;      )(**  !6
"	"
 (6"+$9+$F$F"+)-)>"+/3/J#(,#$/(,(<!"+!2'" " "       * 3MJ	
 $2' 5k B B'%)%:'+/+F $(K +$($8'.'         rX   T)
	task_typer<   r>   responser@   r.   r,   r)   r+   r7   r>   r@   r7   r3   r4   r5   r6   rC   r)   r*   r+   r-   r/   r0   r8   get_attempt_planr   stream_finalr:   r;   )rP   r   )rP   rn   )ru   r   rP   rY   )rP   rI   )rP   r"   )rP   rG   rT   )r   r   r   ry   r   r   r   r   conversationr   r   
isinstancer   hit_input_token_capr   r   ))r<   r>   r@   r(   rC   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   rt   	decoratedr   r   r   r   toolless_hit_input_token_captoolless_messagesr   r   base_messagesr   wrappedresultrm   r   re   s)    `````````` `` ``      `              @@@rV   rW   rW      s     R 8EE  
% 
 1 
!D
 
 	
          2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
 2
h .I 4(HZ((33	T T T T T T T   
E#N33!QA2>>>.
 
 
 	 	
 
 
 
 
 
 
S S S S S S S
 
 
 
 
 
 
  q q (-$$'TTTTTTTT$M&V)L)L(MM$$]336FF ) !9 8/! !
 ( 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1f %G 8.(:..w77 %+N;;)QA2FFF!6   	  gii F %9;;&&&&&&F' 	.Jv?T,U,U 	.)-F& 	*V-BCC 	$1%'='='?'?!8!8!:!:#**,,!     %   v:  e	
  K $m 0/ &~ )  K ) ) "\ &~ *)  +*!" 43#$ '&%& .-'( ))      F,  
j)>?? 
 -!#9#9#;#;4466&&((	
 	
 	
 	
 MrX   rW   )2r<   r=   r>   r?   r@   rA   r(   rB   rC   rD   r)   rE   r*   rF   r+   rG   r,   r"   r-   rH   r.   rI   r/   rE   r0   rA   r1   rJ   r2   rE   r3   rK   r4   rL   r5   rM   r6   rA   r7   rK   r8   rI   r9   rB   r:   rN   r;   rO   rP   rQ   )2r<   r=   r>   r?   r@   rA   r(   rB   rC   rY   r)   rE   r*   rF   r+   rG   r,   r"   r-   rH   r.   rI   r/   rE   r0   rA   r1   rJ   r2   rE   r3   rK   r4   rL   r5   rM   r6   rA   r7   rK   r8   rI   r9   rB   r:   rN   r;   rO   rP   rZ   )2r<   r=   r>   r?   r@   rA   r(   rB   rC   r\   r)   rE   r*   rF   r+   rG   r,   r"   r-   rH   r.   rI   r/   rE   r0   rA   r1   r]   r2   rE   r3   rK   r4   rL   r5   rM   r6   rA   r7   rK   r8   rI   r9   rB   r:   rN   r;   rO   rP   r^   )2r<   r=   r>   r?   r@   rA   r(   rB   rC   r\   r)   rE   r*   rF   r+   rG   r,   r"   r-   rH   r.   rI   r/   rE   r0   rA   r1   rE   r2   rE   r3   rK   r4   rL   r5   rM   r6   rA   r7   rK   r8   rI   r9   rB   r:   rN   r;   rO   rP   rb   )6__doc__
__future__r   loggingcollections.abcr   r   typingr   r   r   r	   r
   pydanticr   sentry_sdk.ai.monitoringr   tenacityr   r   r   
src.configr   r   src.exceptionsr   src.telemetry.loggingr   src.telemetry.reasoning_tracesr   executorr   runtimer   r   r   r   r   r   	tool_loopr   typesr   r   r    r!   r"   r#   	getLogger__name__r|   r$   rW   __all__rT   rX   rV   <module>r      s  	 	 # " " " " "  3 3 3 3 3 3 3 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8       - - - - - - @ @ @ @ @ @ @ @ @ @ ; ; ; ; ; ; ; ; . . . . . . 5 5 5 5 5 5 > > > > > > + + + + + +                ) ( ( ( ( (                
	8	$	$GCy!!! 
 " $"&,09=)-"#)-/3AE!,0#'!37,03# # # # # 
#: 
 " $"&,09=)-"#)-/3AE!,0#'!37,03% % % % % 
%: 
 "-1 $"&,09=)-#)-/3AE!,0#'!37,03R R R R R 
R: *%%% "-1 $"&,09=)-#)-/3AE!,0#'!37,03G G G G G &%GT
 
rX   