
    yj                    H   U d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZ dd	lmZmZ dd
lmZmZ  ej        e          Z edd          Zded<   ddd8dZ ed           G d d                      Zd9dZd:d#Zd;d)Zd<d2Z d=d3Z!d>d6Z"g d7Z#dS )?u  Runtime config planning and retry/fallback selection.

Owns:
- Resolution of ConfiguredModelSettings → ModelConfig.
- Per-attempt planning (AttemptPlan) including primary/fallback selection and
  reasoning-effort/thinking-budget resolution.
- Per-call effective config construction (applying caller kwarg overrides onto
  the selected ModelConfig).
- Retry attempt tracking via a ContextVar, plus the temperature-bump heuristic.
    )annotationsN)
ContextVar)	dataclass)Any)ConfiguredModelSettingsModelConfigModelTransportresolve_model_configsettings   )backend_for_providerclient_for_model_config)ProviderClientReasoningEffortTypecurrent_attempt)defaultzContextVar[int])nameproviderr	   modelstrr   
str | NonereturnNonec                   t           j        sdS 	 ddlm} dt           j        | |di}|||d<     |            j        di | dS # t          $ r&}t                              d|           Y d}~dS d}~ww xY w)	zEBest-effort annotation of the current Langfuse span with LLM routing.Nr   )
get_clientmetadata)	namespacer   r   r   z+Failed to update Langfuse span metadata: %s )	r   LANGFUSE_PUBLIC_KEYlangfuser   	NAMESPACEupdate_current_span	Exceptionloggerdebug)r   r   r   r   update_kwargsexcs         7/DATA/AppData/hermes/projects/honcho/src/llm/runtime.py#update_current_langfuse_observationr)   $   s     ' I'''''' %/$ )
 $(M&!(

(99=99999 I I IBCHHHHHHHHHIs   4A 
A6A11A6T)frozenc                  n    e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   dS )AttemptPlana	  Per-attempt plan produced by `plan_attempt`.

    Replaces the old loose tuple-of-six (`ProviderSelection`) with a single
    dataclass. Carries everything the executor / tool loop needs to make one
    backend call without re-resolving configuration mid-call.
    r	   r   r   r   r   client
int | Nonethinking_budget_tokensr   reasoning_effortr   selected_configintattemptretry_attemptsboolis_fallbackN)__name__
__module____qualname____doc____annotations__r       r(   r,   r,   ?   s           JJJ&&&&))))    LLLr<   r,   model_config%ModelConfig | ConfiguredModelSettingsr   c                N    t          | t                    r| S t          |           S )zHReturn a runtime ModelConfig, resolving settings-shape inputs if needed.)
isinstancer   r
   )r=   s    r(   resolve_runtime_model_configrA   S   s*     ,,, ---r<   r3   r2   r4   c               H   ||k    s| j         | S | j         }t          di d|j        d|j        ddd|j        d|j        d|j        d|j        d	|j        d
|j	        d|j
        d|j        d|j        d|j        d|j        d|j        d|j        d|j        S )zPick the effective config for this attempt.

    Primary config on all attempts except the last, which swaps to the
    resolved fallback (if any).
    Nr   	transportfallbackapi_keybase_urltemperaturetop_ptop_kfrequency_penaltypresence_penaltyseedthinking_effortr/   provider_paramsmax_output_tokensstop_sequencescache_policyr   )rD   r   r   rC   rE   rF   rG   rH   rI   rJ   rK   rL   rM   r/   rN   rO   rP   rQ   )r=   r3   r4   fbs       r(   select_model_config_for_attemptrS   \   s2    .  L$9$A		B   hh,,  

	
  NN hh hh .. ,, WW **  "88 ** ..  ((!" __# r<   runtime_model_configcall_thinking_budget_tokensr.   call_reasoning_effortr   c                b   t          | ||          }|j        }t          ||          }|| u }|r|n|j        }	|r|n|j        }
||k    rI| j        Bt                              d| d| d| j         d| j         dz   d| d|j         z              t          ||j        ||	|
|||| 	  	        S )	zBuild the AttemptPlan for `attempt`.

    Reasoning params are drawn from the caller when we're still on the
    primary config, and from the fallback config otherwise, so cross-transport
    fallbacks use provider-appropriate params.
    )r3   r4   NzFinal retry attempt /z: switching from z to zbackup )	r   r   r-   r/   r0   r1   r3   r4   r6   )
rS   rC   r   r/   rM   rD   r$   warningr   r,   )rT   r3   r4   rU   rV   selectedr   r-   
is_primaryattempt_thinking_budgetattempt_reasoning_efforts              r(   plan_attemptr^      s(    /%  H
 !H$Xx88F11J'1V##x7V  ",I1I  .  %9%B%NN7NN^NNN%/RR2F2LRRRS3338>334	
 	
 	
 n61 %"N
 
 
 
r<   r1   ModelConfig | NonerG   float | None	stop_seqslist[str] | Noner/   r0   c                    | t          ||||||          S ddi}|||d<   |||d<   |||d<   |||d<   |                     |          S )	u  Build the ModelConfig passed to the executor / request_builder.

    Per-call kwargs (temperature, stop_seqs, thinking_*) win when set; otherwise
    the selected_config's values are used. When selected_config is None
    (test-only callers passing provider+model directly) a minimal ModelConfig
    is synthesized.

    max_output_tokens is forced to None so the per-call max_tokens kwarg is
    authoritative — matching historical honcho_llm_call_inner behavior.
    N)r   rC   rG   rP   r/   rM   rO   rG   rP   r/   rM   )update)r   
model_copy)r1   r   r   rG   ra   r/   r0   updatess           r(   effective_config_for_callrg      s    ( #$#9,
 
 
 	
  3D9G!,$- !),B()#%5!"%%W%555r<   c                    | dk    r9t                                           dk    rt                              d           dS | S )u@   Bump temperature from 0.0 → 0.2 on retry attempts for variety.g        r   z,Bumping temperature from 0.0 to 0.2 on retryg?)r   getr$   r%   )rG   s    r(   effective_temperaturerj      sA    co1133a77CDDDsr<   planr   c                6    t          | j        | j                  S )u;   Convenience helper: plan → ready-to-call ProviderBackend.)r   r   r-   )rk   s    r(   resolve_backend_for_planrm      s    t{;;;r<   )	r,   r   rg   rj   r^   rm   rA   rS   r)   )r   r	   r   r   r   r   r   r   )r=   r>   r   r   )r=   r   r3   r2   r4   r2   r   r   )rT   r   r3   r2   r4   r2   rU   r.   rV   r   r   r,   )r1   r_   r   r	   r   r   rG   r`   ra   rb   r/   r.   r0   r   r   r   )rG   r`   r   r`   )rk   r,   r   r   )$r:   
__future__r   loggingcontextvarsr   dataclassesr   typingr   
src.configr   r   r	   r
   r   registryr   r   typesr   r   	getLoggerr7   r$   r   r;   r)   r,   rA   rS   r^   rg   rj   rm   __all__r   r<   r(   <module>rx      s  	 	 	 # " " " " "  " " " " " " ! ! ! ! ! !                    D C C C C C C C 6 6 6 6 6 6 6 6		8	$	$ $.:.?#K#K#K K K K K 	I I I I I I6 $       &. . . .! ! ! !H/ / / /d&6 &6 &6 &6R   < < < <

 
 
r<   