
     j]              	       Z   d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZ ddlZddlZddlmZ d	d
lmZm Z m!Z!m"Z"m#Z#m$Z$ d	dl%m&Z& d	dl'm(Z( d	dl)m*Z*m+Z+m,Z,m-Z- d	dlm.Z.m/Z/  ej`                   ejb                               Z2de3de4deee3e3f   e3f   fdZ5dee6e3f   de6fdZ7de6fdZ8de3de4de4fdZ9de3de4fdZ:de3de4de4fdZ;dee6e3f   de4dee3ddf   fdZ<d e&d!ee6e3f   de6fd"Z=de6fd#Z>d$e6d%e6d&e6de6fd'Z? G d( d)      Z@y)*zCommunicate with the service. Only the Communicate class should be used by
end-users. The other classes and functions are for internal use only.    N)nullcontext)TextIOWrapper)Queue)AsyncGeneratorContextManagerDict	GeneratorListOptionalTupleUnion)escapeunescape)Literal   )DEFAULT_VOICEMP3_BITRATE_BPSSEC_MS_GEC_VERSIONTICKS_PER_SECONDWSS_HEADERSWSS_URL)	TTSConfig)DRM)NoAudioReceivedUnexpectedResponseUnknownResponseWebSocketError)CommunicateStateTTSChunk)cafiledataheader_lengthreturnc                     t        | t              st        d      i }| d| j                  d      D ]  }|j                  dd      \  }}|||<    || |dz   d fS )z
    Returns the headers and data from the given data.

    Args:
        data (bytes): The data to be parsed.
        header_length (int): The length of the header.

    Returns:
        tuple: The headers and data to be used in the request.
    zdata must be bytesNs   
   :r      )
isinstancebytes	TypeErrorsplit)r!   r"   headerslinekeyvalues         A/DATA/.local/lib/python3.12/site-packages/edge_tts/communicate.pyget_headers_and_datar0   2   sr     dE",--G^m$**73ZZa(
U 4 D*,---    stringc                 J   t        | t              r| j                  d      } t        | t              st	        d      t        |       }t        |      D ]>  \  }}t        |      }d|cxk  rdk  sn d|cxk  rdk  sn d|cxk  rdk  s7n :d	||<   @ d
j                  |      S )aS  
    The service does not support a couple character ranges.
    Most important being the vertical tab character which is
    commonly present in OCR-ed PDFs. Not doing this will
    result in an error from the service.

    Args:
        string (str or bytes): The string to be cleaned.

    Returns:
        str: The cleaned string.
    utf-8zstring must be str or bytesr                    )	r'   r(   decodestrr)   list	enumerateordjoin)r2   charsidxcharcodes        r/   remove_incompatible_charactersrF   J   s     &% w'fc"566F|Eu%	TINNd 0b 0bD6FB6FE#J &
 775>r1   c                  >    t        j                         j                  S )zZ
    Returns a UUID without dashes.

    Returns:
        str: A UUID without dashes.
    )uuiduuid4hex r1   r/   
connect_idrL   f   s     ::<r1   textlimitc                 \    | j                  dd|      }|dk  r| j                  dd|      }|S )a  
    Finds the index of the rightmost preferred split character (newline or space)
    within the initial `limit` bytes of the text.

    This helps find a natural word or sentence boundary for splitting, prioritizing
    newlines over spaces.

    Args:
        text (bytes): The byte string to search within.
        limit (int): The maximum index (exclusive) to search up to.

    Returns:
        int: The index of the last found newline or space within the limit,
             or -1 if neither is found in that range.
       
r       )rfind)rM   rN   split_ats      r/   (_find_last_newline_or_space_within_limitrT   p   s4    " zz%E*H!|::dAu-Or1   text_segmentc                     t        |       }|dkD  r	 | d| j                  d       |S |S # t        $ r |dz  }Y nw xY w|dkD  r3)a  
    Finds the rightmost possible byte index such that the
    segment `text_segment[:index]` is a valid UTF-8 sequence.

    This prevents splitting in the middle of a multi-byte UTF-8 character.

    Args:
        text_segment (bytes): The byte segment being considered for splitting.

    Returns:
        int: The index of the safe split point. Returns 0 if no valid split
             point is found (e.g., if the first byte is part of a multi-byte
             sequence longer than the limit allows).
    r   Nr4   r   )lenr<   UnicodeDecodeError)rU   rS   s     r/   _find_safe_utf8_split_pointrY      s_     < H
Q,	(#**73O
 O	 " 	MH	 Q,s   * ;;rS   c                     |dkD  rBd| d| v r;| j                  dd|      }| j                  d||      dk7  r	 |S |}|dkD  rd| d| v r;|S )a  
    Adjusts a proposed split point backward to prevent splitting inside an XML entity.

    For example, if `text` is `b"this &amp; that"` and `split_at` falls between
    `&` and `;`, this function moves `split_at` to the index before `&`.

    Args:
        text (bytes): The text segment being considered.
        split_at (int): The proposed split point index, determined by whitespace
                        or UTF-8 safety.

    Returns:
        int: The adjusted split point index. It will be moved to the '&'
             if an unterminated entity is detected right before the original `split_at`.
             Otherwise, the original `split_at` is returned.
    r      &N   ;)rindexfind)rM   rS   ampersand_indexs      r/   "_adjust_split_point_for_xml_entityra      sm    " Q,44	?2++dAx899T?H5;
 O # Q,44	?2 Or1   byte_lengthc              #     K   t        | t              r| j                  d      } t        | t              st	        d      |dk  rt        d      t        |       |kD  rlt        | |      }|dk  rt        |       }t        | |      }|dk  rt        d      | d| j                         }|r| | |dkD  r|ndd } t        |       |kD  rl| j                         }|r| yyw)a  
    Splits text into chunks, each not exceeding a maximum byte length.

    This function prioritizes splitting at natural boundaries (newlines, spaces)
    while ensuring that:
    1. No chunk exceeds `byte_length` bytes.
    2. Chunks do not end with an incomplete UTF-8 multi-byte character.
    3. Chunks do not split XML entities (like `&amp;`) in the middle.

    Args:
        text (str or bytes): The input text. If str, it's encoded to UTF-8.
        byte_length (int): The maximum allowed byte length for any yielded chunk.
                           Must be positive.

    Yields:
        bytes: Text chunks (UTF-8 encoded, stripped of leading/trailing whitespace)
               that conform to the byte length and integrity constraints.

    Raises:
        TypeError: If `text` is not str or bytes.
        ValueError: If `byte_length` is not positive, or if a split point
                    cannot be determined (e.g., due to extremely small byte_length
                    relative to character/entity sizes).
    r4   ztext must be str or bytesr   z"byte_length must be greater than 0zTMaximum byte length is too small or invalid text structure near '&' or invalid UTF-8Nr   )r'   r=   encoder(   r)   
ValueErrorrW   rT   rY   ra   strip)rM   rb   rS   chunkremaining_chunks        r/   split_text_by_byte_lengthri      s     6 ${{7#dE"344a=>>
d)k
!;D+Na<248H 6dHEa< C  Yh%%'K 1H!565 d)k
!: jjlO s   CC!	C!tcescaped_textc                     t        |t              r|j                  d      }d| j                   d| j                   d| j
                   d| j                   d| dS )z
    Creates a SSML string from the given parameters.

    Args:
        tc (TTSConfig): The TTS configuration.
        escaped_text (str or bytes): The escaped text. If bytes, it must be UTF-8 encoded.

    Returns:
        str: The SSML string.
    r4   z_<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='z'><prosody pitch='z' rate='z
' volume='z'>z</prosody></voice></speak>)r'   r(   r<   voicepitchratevolume)rj   rk   s     r/   mkssmlrq     sd     ,&#**73	z "88*HRWWIZ		{".			r1   c                  R    t        j                  dt        j                               S )zg
    Return Javascript-style date string.

    Returns:
        str: Javascript-style date string.
    z:%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time))timestrftimegmtimerK   r1   r/   date_to_stringrv      s      ==Ddkkm r1   
request_id	timestampssmlc                     d|  d| d| S )z
    Returns the headers and data to be used in the request.

    Returns:
        str: The headers and data to be used in the request.
    zX-RequestId:z1
Content-Type:application/ssml+xml
X-Timestamp:zZ
Path:ssml

rK   )rw   rx   ry   s      r/   ssml_headers_plus_datar{   0  s)     zl # k "&		r1   c                   J   e Zd ZdZefdddddddddd	ed
ededededed   deej                     dee   dee
   dee
   fdZdedefdZd dZdeedf   fdZdeedf   fdZ	 d!deeef   deeeef      ddfdZdeeddf   fdZ	 d!deeef   deeeef      ddfdZy)"Communicatez'
    Communicate with the service.
    z+0%z+0HzSentenceBoundaryN
   <   )ro   rp   rn   boundary	connectorproxyconnect_timeoutreceive_timeoutrM   rm   ro   rp   rn   r   WordBoundaryr~   r   r   r   r   c                   t        |||||      | _        t        |t              st	        d      t        t        t        |            d      | _        |t        |t              st	        d      || _	        t        |	t              st	        d      t        |
t              st	        d      t        j                  d d |	|
      | _        |%t        |t        j                        st	        d      || _        dd	d	d
d	d	d| _        y )Nztext must be stri   zproxy must be strzconnect_timeout must be intzreceive_timeout must be int)totalconnectsock_connect	sock_readz'connector must be aiohttp.BaseConnectorr1   r   F)partial_textoffset_compensationlast_duration_offsetstream_was_calledchunk_audio_bytescumulative_audio_bytes)r   
tts_configr'   r=   r)   ri   r   rF   textsr   intaiohttpClientTimeoutsession_timeoutBaseConnectorr   state)selfrM   rm   ro   rp   rn   r   r   r   r   r   s              r/   __init__zCommunicate.__init__G  s    $E4I $$.// /1$78

 Zs%;/00$)
 /3/9::/3/9::&44(%	 
  Iw?T?T)UEFF:C  #$$%!&!"&'(

r1   r!   r#   c                     t        j                  |      d   D ]X  }|d   }|dv r;|d   d   | j                  d   z   }|d   d   }|||t        |d   d   d	         d
c S |dv rLt	        d|        t        d      )NMetadataTyper   DataOffsetr   DurationrM   Text)typeoffsetdurationrM   )
SessionEndzUnknown metadata type: zNo WordBoundary metadata found)jsonloadsr   r   r   r   )r   r!   meta_obj	meta_typecurrent_offsetcurrent_durations         r/   __parse_metadatazCommunicate.__parse_metadata  s    

4(4H (I@@V$X.<Q1RR  $,F#3J#? %, 0$Xf%5f%=f%EF	  O+!$;I;"GHH 5  !!ABBr1   c                     | j                   dxx   | j                   d   z  cc<   | j                   d   dz  t        z  t        z  | j                   d<   d| j                   d<   y)a  Update inter-chunk offset_compensation from cumulative CBR audio bytes.

        The output format is audio-24khz-48kbitrate-mono-mp3 (48 kbps CBR).
        For any CBR stream the byte-to-tick conversion is exact integer
        arithmetic:  ticks = total_bytes * 8 * 10_000_000 // 48_000.

        This replaces the previous metadata-based accumulation which drifted
        on long texts due to variable AI silence and Microsoft's integer
        overflow in reported offsets.
        r   r   r5   r   r   N)r   r   r   )r   s    r/   __compensate_offsetzCommunicate.__compensate_offset  sj     	

+,

;N0OO,JJ/0  	

() +,

&'r1   c           
     z   K   d& fd}d& fd}d}t        j                   j                  d j                        4 d {   }|j	                  t
         dt                dt        j                          dt         d	 j                  t        j                  t              t        
      4 d {    |        d {     |        d {    2 3 d {   }|j                  t         j                  j                   k(  r|j"                  j%                  d      }t'        ||j)                  d            \  }}|j+                  dd       }	|	dk(  r/ j-                  |      }
|
 |
d   |
d   z    j.                  d<   |	dk(  r j1                           n|	dvst3        d      |j                  t         j                  j4                  k(  r$t7        |j"                        dk  rt9        d      t:        j=                  |j"                  d d d      }|t7        |j"                        kD  rt9        d      t'        |j"                  |      \  }}|j+                  d      dk7  rt9        d      |j+                  dd       }|dvrt9        d      |t7        |      dk(  rt9        d      t7        |      dk(  rt9        d       d} j.                  d!xx   t7        |      z  cc<   d"|d# #|j                  t         j                  j>                  k(  sLtA        |j"                  r|j"                        d$      |stC        d%      d d d       d {    d d d       d {    y 7 77 7 7 7 6 ?7 &# 1 d {  7  sw Y   6xY w7 -# 1 d {  7  sw Y   y xY ww)'Nc            	         K   j                   j                  dk(  } | rdnd}| sdnd}j                  dt                d| d| d       d{    y7 w)	z)Sends the command request to the service.r   truefalsezX-Timestamp:z
Content-Type:application/json; charset=utf-8
Path:speech.config

{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"z","wordBoundaryEnabled":"z9"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}
N)r   r   send_strrv   )word_boundarywdsqr   	websockets      r/   send_command_requestz2Communicate.__stream.<locals>.send_command_request  sn      OO44FM(gB,'B$$~/0 1. /1T1J2$ O		 	 	s   AAAAc                     K   j                  t        t               t               t	         j
                   j                  d                      d{    y7 w)z&Sends the SSML request to the service.r   N)r   r{   rL   rv   rq   r   r   )r   r   s   r/   send_ssml_requestz/Communicate.__stream.<locals>.send_ssml_request  sM     $$&L"$

>2	 	 	s   AAAAFT)r   	trust_envtimeoutz&ConnectionId=z&Sec-MS-GEC=z&Sec-MS-GEC-Version=   )compressr   r+   sslr4   s   

s   Paths   audio.metadatar   r   r   s   turn.end)s   responses
   turn.startzUnknown path receivedr&   zBWe received a binary message, but it is missing the header length.bigz9The header length is greater than the length of the data.s   audioz3Received binary message, but the path is not audio.s   Content-Type)s
   audio/mpegNz=Received binary message, but with an unexpected Content-Type.r   z<Received binary message with no Content-Type, but with data.z:Received binary message, but it is missing the audio data.r   audio)r   r!   zUnknown errorzFNo audio was received. Please verify that your parameters are correct.r#   N)"r   ClientSessionr   r   
ws_connectr   rL   r   generate_sec_ms_gecr   r   headers_with_muidr   _SSL_CTXr   	WSMsgTypeTEXTr!   rd   r0   r_   get_Communicate__parse_metadatar   _Communicate__compensate_offsetr   BINARYrW   r   r   
from_bytesERRORr   r   )r   r   r   audio_was_receivedsessionreceivedencoded_data
parametersr!   pathparsed_metadatar"   content_typer   s   `            @r/   __streamzCommunicate.__stream  s    	 	  # ((nn((
 
 g((i~jl^32245"#5"68 **))+6 ) 
 
 &(((#%%%"+ Qh==G$5$5$:$::*2--*>*>w*GL';$l&7&7&D($J &>>'48D00*.*?*?*E-- ,H5
8SS 

#9: , 002%AA-.EFF]]g&7&7&>&>>8==)A-0` 
 %(NN8==!3De$LM$s8=='990W 
 (< }($J
 "~~g.(:0Q  $.>>/4#HL#+@@0[ 
 $+t9>$ 1Z 
 4yA~0X 
 *.&JJ23s4y@3#*D99]]g&7&7&=&==()1 <K  &%\ C
 
	
 
 

 )%Q)
 
 
 
	
 
 
 
s   AN;M<N;	A-N&6M?7N&:NNNNNNN
 N#CN'E>N'4NN&&N'N&+N;6N$7N;?N&NNNNN&N!	NN!	N&$N;&N8,N/-N84N;c                  K   | j                   d   rt        d      d| j                   d<   | j                  D ]=  | j                   d<   d| j                   d<   	 | j                         2 3 d{   }|  y7 6 E# t        j
                  $ r_}|j                  dk7  r t        j                  |       d| j                   d<   | j                         2 3 d{  7  }| 6 Y d}~d}~ww xY ww)	au  
        Streams audio and metadata from the service.

        Raises:
            NoAudioReceived: If no audio is received from the service.
            UnexpectedResponse: If the response from the service is unexpected.
            UnknownResponse: If the response from the service is unknown.
            WebSocketError: If there is an error with the websocket.
        r   zstream can only be called once.Tr   r   r   Ni  )	r   RuntimeErrorr   _Communicate__streamr   ClientResponseErrorstatusr   handle_client_response_error)r   messagees      r/   streamzCommunicate.stream6  s      ::)*@AA*.

&' +/**DJJ~&./DJJ*+
"%)]]_ "'!M	 +5"_.. "88s?00323

./%)]]_ " "'!M &5"ss   AC2A=(A;,A9-A;0A=7C29A;;A=<C2=C/AC*C$C
C$C*%C2*C//C2audio_fnamemetadata_fnamec                   K   |t        |dd      n	t               }|5  t        |d      5 }| j                         2 3 d{   }|d   dk(  r|j                  |d          &t	        |t
              s7|d   d	v s?t        j                  ||       |j                  d
       g7 b6 	 ddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY ww)zE
        Save the audio and metadata to the specified files.
        Nwr4   )encodingwbr   r   r!   r   
)openr   r   writer'   r   r   dump)r   r   r   metadatar   r   s         r/   savezCommunicate.saveW  s      ) w7 	
 tK.%!% )g6?g-KK0-8WV_ Q > IIgx0NN4() /..XXXsb   CCB1B&B$
B&.B14B1<(B1$B&&B1(	C1B:	6C=	CCCc              #       K   dt         ddf fd}t               }t        j                  j                         5 }|j	                  ||       	 |j                         }|n| 	 ddd       y# 1 sw Y   yxY ww)z-Synchronous interface for async stream methodqueuer#   Nc                      d fd}t        j                         }t        j                  |       |j                   |              |j	                          y )Nc                     K   j                         2 3 d {   } j                  |        7 6 j                  d        y wN)r   put)itemr   r   s    r/   	get_itemszECommunicate.stream_sync.<locals>.fetch_async_items.<locals>.get_itemss  s5     "&++- $$IIdO$-		$s   A1/1A1Ar   )asyncionew_event_loopset_event_looprun_until_completeclose)r   r   loopr   s   `  r/   fetch_async_itemsz2Communicate.stream_sync.<locals>.fetch_async_itemsr  s?     
 ))+D""4(##IK0JJLr1   )r   
concurrentfuturesThreadPoolExecutorsubmitr   )r   r   r   executorr   s   `    r/   stream_synczCommunicate.stream_synco  sv     		U 		t 		 w224OO-u5yy{<
	   544s   7A<,A0'	A<0A95A<c                     t         j                  j                         5 }|j                  t        j
                  | j                  ||            }|j                          ddd       y# 1 sw Y   yxY w)z,Synchronous interface for async save method.N)r   r   r  r  r   runr   result)r   r   r   r  futures        r/   	save_synczCommunicate.save_sync  sQ     224__TYY{NCF MMO	 544s   AA))A2r   r   )__name__
__module____qualname____doc__r   r=   r   r   r   r   r   r   r(   r   r   r   r   r   r   r   r   r	   r  r	  rK   r1   r/   r}   r}   A  s    #9

 @R59#)+)+9
9
 9

 9
 9
 9
 <=9
 G1129
 }9
 "#9
 "#9
vCU Cx C&,(Kx~ > KZ"	$	'"H 7;)3:&) !sEz!23) 
	)0Yxt';< 8 7;
3:&
 !sEz!23
 
	
r1   r}   )Ar  r   concurrent.futuresr   r   r   rs   rH   
contextlibr   ior   r   r   typingr   r   r   r	   r
   r   r   r   xml.sax.saxutilsr   r   r   certifityping_extensionsr   	constantsr   r   r   r   r   r   data_classesr   drmr   
exceptionsr   r   r   r   r   r   create_default_contextwherer   r(   r   r0   r=   rF   rL   rT   rY   ra   ri   rq   rv   r{   r}   rK   r1   r/   <module>r     s  I    
   "  	 	 	 .   %  $   /%3%%]W]]_=.
. #.
4uu$%.05e+<  8C 5   0e  8U c c <B
U

B*-BudD !BJy c5j(9 c 2  s s # # "Q Qr1   