
    yj6+                        d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ  G d d	ee          Z G d
 de          Z G d dee          Z G d de          Zg dZdS )aT  LLM-call events for Honcho telemetry.

These events fire once per provider hit (each iteration of an agentic tool loop,
each deriver/summarizer LLM call, etc.) and carry the full cost-attribution
context: model/provider/transport, token counts with cache breakdown, finish
reason, outcome (success/error), retry/fallback state, and run correlation.

Unlike the existing aggregate `*Completed` events (representation, dialectic,
dream), this event is high-volume. It participates in the
`settings.TELEMETRY.HIGH_VOLUME_SAMPLE_RATE` sampler so per-iteration emission
can be tuned against a budget.
    )annotations)Enum)ClassVarLiteral)Field)ModelTransport)	BaseEventc                  *    e Zd ZdZdZdZdZdZdZdZ	dS )	CallPurposeu   Closed taxonomy for LLM call purposes.

    The schema lint enforces that all `LLMCallCompletedEvent` emissions use a
    value from this enum. Adding a new call site requires adding a value here
    first — keeps the analytics taxonomy stable.
    zderiver.representationzdialectic.answerzdream.deductionzdream.inductionzsummary.shortzsummary.longN)
__name__
__module____qualname____doc__DERIVER_REPRESENTATIONDIALECTIC_ANSWERDREAM_DEDUCTIONDREAM_INDUCTIONSUMMARY_SHORTSUMMARY_LONG     @/DATA/AppData/hermes/projects/honcho/src/telemetry/events/llm.pyr   r      s:          6)'O'O#M!LLLr   r   c                     e Zd ZU dZdZded<   dZded<   dZded	<   d
Zded<    e	dd          Z
ded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	dd           Zd!ed"<    e	d#d$          Zd!ed%<    e	d#d&          Zd!ed'<    e	d#d(          Zd!ed)<    e	d#d*          Zd!ed+<    e	dd,          Zded-<    e	dd.          Zd/ed0<    e	dd1          Zd2ed3<    e	dd4          Zded5<    e	dd6          Zd!ed7<    e	dd8          Zd!ed9<    e	dd:          Zd2ed;<    e	dd<          Zd=ed><    e	d?d@          Zd2edA<    e	d#dB          Zd!edC<    e	d?dD          Zd2edE<    e	ddF          Z dedG<    e	ddH          Z!dIedJ<   dMdLZ"dS )NLLMCallCompletedEventu  Emitted once per provider hit by `honcho_llm_call_inner`.

    Covers success, failure, and cancellation via `outcome`. The last attempt
    of a tenacity retry chain is flagged with `is_final_attempt=True` regardless
    of outcome — calibration queries for "exhausted" use
    `outcome='error' AND is_final_attempt`. Cancellations (typically client
    disconnect mid-stream or server shutdown) are distinct from errors and
    should not feed error-rate alerting.

    Streaming note: when `was_stream=True`, the token counts are placeholders
    (0) because token totals aren't knowable until the stream drains. Use the
    aggregate envelopes (`DialecticCompletedEvent` etc.) for streamed-call
    accuracy until streaming completion is wired through.
    zllm.call.completedClassVar[str]_event_type   ClassVar[int]_schema_versionllm	_categoryhigh_volume_volume_classNWorkspace namedefaultdescription
str | Noneworkspace_namez@Closed enum identifying the call site (deriver, dialectic, etc.)zCallPurpose | Nonecall_purposezYParent category for analytics joins: 'representation' | 'dialectic' | 'dream' | 'summary'parent_category.z0SDK transport: 'anthropic' | 'openai' | 'gemini'r'   r   	transportzBest-effort vendor inference for relay setups (e.g. 'anthropic' when an OpenRouter base_url + 'anthropic/claude-...' model is used); None when not reliably inferableprovider_labelz(Model identifier as sent to the providerstrmodelz#max_tokens value used for this callinteffective_max_output_tokensr   zProvider input_tokensprovider_input_tokenszProvider output_tokensprovider_output_tokenszTokens read from prompt cachecache_read_tokenszTokens written to prompt cachecache_creation_tokensz5First finish reason from the response (None on error)finish_reasonz'success' when the provider returned a result, 'error' when it raised, 'cancelled' when the awaitable was cancelled (client disconnect, server shutdown). Cancellations should be excluded from error-rate alerting.(Literal['success', 'error', 'cancelled']outcomezTrue when this is the last allowed attempt (attempt == retry_attempts). Combine with outcome='error' to identify retry-exhausted calls. Cancellations are not retried so this reflects the attempt at cancellation time.boolis_final_attemptzSException class name when outcome is 'error' or 'cancelled' (e.g. 'CancelledError')error_classz!1-indexed tenacity attempt numberattemptz Total attempts allowed by callerretry_attemptsz4True when this attempt used the fallback ModelConfigwas_fallback(Wall-clock duration of the provider callfloatduration_msFzTrue if tools were provided	has_toolsz(Number of tool calls the model requestedtool_call_countua   True for the stream_final_response path. Token counts are 0 placeholders — see class docstring.
was_streamz-Agent run id (ULID when widened in follow-up)run_idu   1-indexed iteration within an agentic tool loop. Passed explicitly via LLMTelemetryContext — NOT read from set_current_iteration (that fires after the LLM call)z
int | None	iterationreturnc           	     r    | j         pd}| j        | j        nd}| d| d| j         d| j         d| j         	S )zResource id includes run_id + iteration + attempt + transport/model
        so multi-attempt retries within one iteration get distinct ids.noneNr   :)rF   rG   r=   r-   r0   )selfrunrG   s      r   get_resource_idz%LLMCallCompletedEvent.get_resource_id   sT     k#V&*n&@DNNa	PP	PPDLPP4>PPDJPPPr   rH   r/   )#r   r   r   r   r   __annotations__r   r!   r#   r   r)   r*   r+   r-   r.   r0   r2   r3   r4   r5   r6   r7   r9   r;   r<   r=   r>   r?   rB   rC   rD   rE   rF   rG   rN   r   r   r   r   r   )   s          "6K5555%&O&&&&$I$$$$#0M0000 "'tAQ!R!R!RNRRRR',uV( ( (L     #(%o# # #O     !&K! ! !I     "' |" " "N     s(RSSSESSSS',u>( ( (    
 "'q>U!V!V!VVVVV"'%?W"X"X"XXXXX"U>       "'?" " "    
 !&K! ! !M     9> k9 9 9G     #U o       $ei  K     5*MNNNGNNNN%1STTTNTTTTO  L    
 C  K    
 eE7TUUUIUUUU 5I  O     uw  J     C  F     "E y  I    
Q Q Q Q Q Qr   r   c                  :    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdS )EmbeddingCallPurposeu  Closed taxonomy for embedding call purposes.

    Mirrors `CallPurpose` for LLM calls. Adding a new embedding call site
    requires adding a value here first — keeps the analytics taxonomy stable
    and prevents free-form `track_name` drift from leaking into queries.
    search_memorysearch_messagescreate_observationsvector_syncsummarymessage_createdialectic_prefetchsession_context_searchpreference_extractiongeneric_document_searchN)r   r   r   r   SEARCH_MEMORYSEARCH_MESSAGESCREATE_OBSERVATIONSVECTOR_SYNCSUMMARYMESSAGE_CREATEDIALECTIC_PREFETCHSESSION_CONTEXT_SEARCHPREFERENCE_EXTRACTIONGENERIC_DOCUMENT_SEARCHr   r   r   rR   rR      sU          $M'O/KG%N .537r   rR   c                     e Zd ZU dZdZded<   dZded<   dZded	<   d
Zded<    e	dd          Z
ded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	dd          Zded<    e	d d!          Zded"<    e	dd#          Zd$ed%<    e	dd&          Zd'ed(<    e	d)d*          Zd+ed,<    e	dd-          Zded.<    e	dd/          Zded0<   d3d2ZdS )4EmbeddingCallCompletedEventaM  Emitted once per embedding-provider call.

    Embedding calls are real provider spend (per-token like LLM calls).
    Search tools, observation creation, the message-embedding sync, and
    the deriver/summarizer paths all hit the embedding API; this event
    captures cost-attribution context for all of them.

    Volume note: this event is high-volume. Interactive paths
    (`search_memory` / `search_messages`) emit one event per query, so under
    a search-heavy dialectic load this can match or exceed the LLM call
    rate. The shared `HIGH_VOLUME_SAMPLE_RATE` covers both.
    zembedding.call.completedr   r   r   r   r   r    r!   r"   r#   Nr$   r%   r(   r)   zClosed enum identifying the call site. Set by callers via the `embedding_call_purpose` ContextVar; None when the call originated outside an instrumented path.zEmbeddingCallPurpose | Noner*   zHParent category for analytics joins (e.g. 'dialectic', 'representation')r+   .z'openai' | 'gemini'r,   r/   providerzModel identifierr0   z2Number of texts embedded in this call (batch size)r1   input_countr   u3  tiktoken-based size proxy for the embedded text. ESTIMATE only — the embedding client uses encoding_for_model() with a cl100k_base fallback (see embedding_client.py:68-71), which is exact for older OpenAI models, an approximation for newer ones, and a rough proxy for Gemini (which has its own tokenizer).input_tokens_estimater@   rA   rB   z'success' when the provider returned a result, 'error' when it raised, 'cancelled' when the awaitable was cancelled. Cancellations should be excluded from error-rate alerting.r8   r9   FzTrue on the last retry attempt. Mirrors LLMCallCompletedEvent's convention: combine with outcome='error' to identify exhausted embedding calls. Cancellations are not retried.r:   r;   z;Exception class name when outcome is 'error' or 'cancelled'r<   zGAgent run id when called from an agentic loop; None for sync/CRUD pathsrF   rH   c           	     |    | j         pd}| j        r| j        j        nd}| d| d| j         d| j         d| j         	S )zResource id includes timestamp-derived components implicitly via
        generate_id(); we just stake out a non-empty identifier scope.rJ   unknownrK   )rF   r*   valueri   r0   rj   )rL   rM   purposes      r   rN   z+EmbeddingCallCompletedEvent.get_resource_id   s[     k#V-1->M$#))IQQQQ$-QQ$*QQt?OQQQr   rO   )r   r   r   r   r   rP   r   r!   r#   r   r)   r*   r+   ri   r0   rj   rk   rB   r9   r;   r<   rF   rN   r   r   r   rh   rh      s          "<K;;;;%&O&&&&$I$$$$#0M0000!&tAQ!R!R!RNRRRR05,1 1 1L     #(%^# # #O    
 E#+@AAAHAAAAs(:;;;E;;;;uM  K     "'D	" 	" 	" 	 	 	 	 C  K     9> F9 9 9G     #U>       $eQ  K    
 ]  F    
R R R R R Rr   rh   )r   rh   rR   r   N)r   
__future__r   enumr   typingr   r   pydanticr   
src.configr   src.telemetry.events.baser	   r/   r   r   rR   rh   __all__r   r   r   <module>rw      sf    # " " " " "       $ $ $ $ $ $ $ $       % % % % % % / / / / / /" " " " "#t " " " nQ nQ nQ nQ nQI nQ nQ nQb8 8 8 8 83 8 8 8.OR OR OR OR OR) OR OR ORd  r   