
    3j;                    ~   d Z ddlmZ g dZddlZddlZddlZddlZddlZddl	Z	ddl
Z
 ej                  d      Z ej                  d      ZddZdddZddd	Zddd
ZdddZdddZ	 	 ddZddZddZd dZej0                   G d d             Z	 	 d!	 	 	 	 	 	 	 d"dZedk(  rddlZ ej:                          yy)#z 
Tools for working with strings
    )annotations)whitespaceEqualgetNumFromStrhyphenToCamelCasecamelCaseToHyphenspaceCamelCasegetMd5	formatStrstripAccentsnormalizeFilenameremovePunctuationparenthesesMatchParenthesesMatchNz\s+z
+c                    t         j                  d|       } t         j                  d|      }t        j                  d|       } t        j                  d|      }| |k(  ryy)a  
    returns True if a and b are equal except for whitespace differences

    >>> a = '    hello \n there '
    >>> b = 'hello there'
    >>> c = ' bye there '
    >>> common.whitespaceEqual(a, b)
    True
    >>> common.whitespaceEqual(a, c)
    False
     TF)
WHITESPACEsubLINEFEED)abs     G/DATA/.local/lib/python3.12/site-packages/music21/common/stringTools.pyr   r   ,   sQ     	r1Ar1ARARAAv    c                    g }g }| D ])  }||v r|j                  |       |j                  |       + dj                  |      dj                  |      fS )a$  
    Given a string, extract any numbers.
    Return two strings, the numbers (as strings) and the remaining characters.

    >>> common.getNumFromStr('23a')
    ('23', 'a')
    >>> common.getNumFromStr('23a954Hello')
    ('23954', 'aHello')
    >>> common.getNumFromStr('')
    ('', '')
    r   )appendjoin)usrStrnumbersfoundremainchars        r   r   r   C   sT     EF7?LLMM$	  775>2776?**r   c                    d}t        | j                  |            D ]   \  }}|dk(  r|}||j                         z  }" |S )a  
    Given a hyphen-connected-string, change it to
    a camelCaseConnectedString.

    The replacement can be specified to be something besides a hyphen.

    >>> common.hyphenToCamelCase('movement-name')
    'movementName'

    >>> common.hyphenToCamelCase('movement_name', replacement='_')
    'movementName'

    Safe to call on a string lacking the replacement character:

    >>> common.hyphenToCamelCase('voice')
    'voice'

    And on "words" beginning with numbers:

    >>> common.hyphenToCamelCase('music-21')
    'music21'
    r   r   )	enumeratesplit
capitalize)r   replacementpostiwords        r   r   r   Z   sI    . DV\\+6746DDOO%%D	 8
 Kr   c                    t        |      dk7  rt        d      |j                         |k7  rt        d      t        j                  dd|z   dz   |       }t        j                  dd|z   dz   |      j                         S )aF  
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    The replacement can be specified to be something besides a hyphen, but only
    a single character and not (for internal reasons) an uppercase character.

    code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

    >>> common.camelCaseToHyphen('movementName')
    'movement-name'

    First letter can be uppercase as well:

    >>> common.camelCaseToHyphen('MovementName')
    'movement-name'

    >>> common.camelCaseToHyphen('movementNameName')
    'movement-name-name'

    >>> common.camelCaseToHyphen('fileName', replacement='_')
    'file_name'

    Some things you cannot do:

    >>> common.camelCaseToHyphen('fileName', replacement='NotFound')
    Traceback (most recent call last):
    ValueError: Replacement must be a single character.

    >>> common.camelCaseToHyphen('fileName', replacement='A')
    Traceback (most recent call last):
    ValueError: Replacement cannot be an uppercase character.
       z'Replacement must be a single character.z-Replacement cannot be an uppercase character.z(.)([A-Z][a-z]+)z\1z\2z([a-z0-9])([A-Z]))len
ValueErrorlowerrer   )r   r%   s1s      r   r   r   z   s    F ;1BCCk)HII	"EK$7%$?	HB66%u{':U'BBGMMOOr   c                   d}d}d}d}d}g }|d}	n|}	| D ]r  }
|
|v rd}nd}|r|s|sd}nd}|s|s|rd}nd}|r7|
j                         s|s|r|j                  d       |j                  |
       n|j                  |
       |rd}qd}t dj                  |      }|	D ]%  }dj                  |      }|j                  ||      }' |r|j                  dd      }|S )a  
    Given a camel-cased string, or a mixture of numbers and characters,
    create a space separated string.

    If replaceUnderscore is True (default) then underscores also become spaces (but without the _)

    >>> common.spaceCamelCase('thisIsATest')
    'this Is A Test'
    >>> common.spaceCamelCase('ThisIsATest')
    'This Is A Test'
    >>> common.spaceCamelCase('movement3')
    'movement 3'
    >>> common.spaceCamelCase('opus41no1')
    'opus 41 no 1'
    >>> common.spaceCamelCase('opus23402no219235')
    'opus 23402 no 219235'
    >>> common.spaceCamelCase('opus23402no219235').title()
    'Opus 23402 No 219235'

    There is a small list called fixMeList that can fix mistakes.

    >>> common.spaceCamelCase('PMFC22')
    'PMFC 22'

    >>> common.spaceCamelCase('hello_myke')
    'hello myke'
    >>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
    'hello_myke'
    z0123456789.F)PMFCT r   _)isupperr   r   replace)r   replaceUnderscore	fixMeListr   firstNum	firstCharisNumber	lastIsNumr&   	fixupListr    postStrfixMefixMeSpaceds                 r   r   r      s   < GHIHID 		7?HHHYHH 	iII||~YC KKKKII7 8 ggdmGhhuo//+u5  //#s+Nr   c                d   | ;t        t        j                               t        t        j                               z   } t        j                         }	 |j                  |        |j                         S # t        $ r2 |j                  | j                  d             Y |j                         S w xY w)z
    Return an md5 hash from a string.  If no value is given then
    the current time plus a random number is encoded.

    >>> common.getMd5('test')
    '098f6bcd4621d373cade4e832627b4f6'
    UTF-8)	strtimerandomhashlibmd5update	TypeErrorencode	hexdigest)valuems     r   r	   r	      s     }DIIK 3v}}#77A(	 ;;=  (	g&';;=(s   A4 4)B/.B/c                j   | g|} t        t        |             D ]K  }| |   }t        |t              r|j	                  d      | |<   t        |t
              r=	 t        |      | |<   M dj                  |       dz   S # t        $ r- 	 |j	                  d      | |<   n# t        $ r d| |<   Y nw xY wY w xY w)a  
    DEPRECATED: do not use.  May be removed at any time.

    Format one or more data elements into string suitable for printing
    straight to stderr or other outputs

    >>> a = common.formatStr('test', '1', 2, 3)
    >>> print(a)
    test 1 2 3
    <BLANKLINE>
    zutf-8r   r2   
)
ranger+   
isinstancebytesdecoderB   reprrH   AttributeErrorr   )msgrest_of_messagekeywordsr'   xs        r   r
   r
   
  s     
!
!C3s8_FaXXg&CF!S! aA  88C=4    XXg.CF%  CF  s6   A<<	B2BB2B,)B2+B,,B21B2c                B   t        j                  d|       j                  dd      j                  dd      j                  dd      j                  dd      j                  d	d      }d
j                  |D cg c]  }t        j                  |      r| c}      S c c}w )u  
    removes accents from unicode strings.

    >>> s = 'trés vite'
    >>> 'é' in s
    True
    >>> common.stripAccents(s)
    'tres vite'

    Also handles the German Eszett and smart quotes

    >>> common.stripAccents('Muß')
    'Muss'
    >>> common.stripAccents('Süss, “êtré”')
    'Suss, "etre"'

    Note -- it is still possible to have non-Ascii characters after this,
    like in this Japanese expression for music:

    >>> common.stripAccents('音楽')
    '音楽'
    NFKD   ßssu   “"u   ”u   ‘'u   ’r   )unicodedata	normalizer5   r   	combining)inputString	nfkd_formcs      r   r   r   (  s    0 	fk2	t									  77yIy!0E0Ea0HAyIJJIs   6BBc                   d}t        |       }|dkD  r!| d   dk(  rt        | |dz
  d       }| d|dz
   } t        |       } | j                  dd      j	                  d      } t        j                  d	d
|       j                         } || |z  } | S )u  
    take a name that might contain unicode characters, punctuation,
    or spaces and
    normalize it so that it is POSIX compliant (except for the limit
    on length).

    Takes in a string or unicode string and returns a string (unicode in Py3)
    without any accented characters.

    >>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
    '03-Niccolo_all_lessandra_not_really.xml'
    N   .   asciiignorerA   z[^\w-]r3   )r+   rB   r   rI   rR   r.   r   strip)name	extensionlenNames      r   r   r   J  s     I$iG{tBx3Wq[\*+	LWq[!D;;w)009D66)S$'--/D	Kr   c                r    t         j                  ddt        j                        }| j	                  |      }|S )z
    Remove all punctuation from a string.

    >>> common.removePunctuation('This, is! my (face).')
    'This is my face'
    r   )rB   	maketransstringpunctuation	translate)srq   outs      r   r   r   f  s/     b"f&8&89I
++i
 CJr   c                  6    e Zd ZU ded<   ded<   ded<   ded<   y)	r   intstartendrB   textlist[ParenthesesMatch]nestedN)__name__
__module____qualname____annotations__ r   r   r   r   q  s    J	H
I""r   r   c           	        |r|st        d      t        dddg       }|g}d}d}|t        |       k  r|sN| ||t        |      z    |k(  r:t        |t        |      z   ddg       }|j                  |       |t        |      z  }_|s| ||t        |      z    |k(  r{t        |      dk  rt        d|d|d	| d
      |j	                         }||_        | |j                  | |_        |d   j                  j                  |       |t        |      z  }| |   dk(  r| }nd}|dz  }|t        |       k  rt        |      dkD  r"t        d|d	|d   j                  dz
   d      |j                  S )ap  
    Utility tool to return a list of parentheses matches for a string using a dataclass
    called `ParenthesesMatch` which has indices of the `start` and `end`
    of the match, and the `text` of the match, and a set of `nested`
    ParenthesesMatch objects (which may have their own nested objects).

    >>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
    >>> common.stringTools.parenthesesMatch(st)
    [ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
                      nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
                              ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
     ParenthesesMatch(start=47, end=49, text='on', nested=[])]

    Other brackets can be used:

    >>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
    >>> common.stringTools.parenthesesMatch(st, open='[', close=']')
    [ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
     ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
     ParenthesesMatch(start=30, end=44, text='not [mix] very',
                      nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
     ParenthesesMatch(start=47, end=52, text='well.', nested=[])]

    The `open` and `close` parameters can be multiple characters:

    >>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
    >>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
    [ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
     ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
                      nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]

    They cannot, however, be empty:

    >>> common.stringTools.parenthesesMatch(st, open='', close='')
    Traceback (most recent call last):
    ValueError: Neither open nor close can be empty.

    Unmatched opening or closing parentheses will raise a ValueError:

    >>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
    Traceback (most recent call last):
    ValueError:  Opening '(' at index 3 was never closed

    >>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
    Traceback (most recent call last):
    ValueError: Closing '>' without '<' at index 23.

    Note that using multiple characters like a prefix can have unintended consequences:

    >>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
    >>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
    Traceback (most recent call last):
    ValueError: Closing '")' without 'Pitch("' at index 59.

    So to do something like this, you might need to get creative:

    >>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
    >>> out
    [ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
     ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
     ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
     ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
                      nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
    >>> extractedPitches = []
    >>> for match in out:
    ...     if st[match.start - 7:match.start] == 'Pitch("':
    ...          extractedPitches.append(match.text)
    >>> extractedPitches
    ['C4', 'D5', 'E6', 'Pity("Z9")']

    * New in v9.3.
    z$Neither open nor close can be empty.r   Fr   r*   zClosing z	 without z
 at index rh   \zOpening z was never closed)	r,   r   r+   r   poprz   ry   r{   r}   )ru   openclose	mainMatchstacklastCharWasBackslashr'   curPMs           r   r   r   x  s   Z u?@@ RR0I%.KE 	A
c!f*$aCI&$.$QT]BB?ELLTNA&Aa#e*n%.5zQ 8E9IdXZPQsRS!TUUIIKEEI5;;q)EJ"I##E*UOAQ44<';#; #( 	Q- c!f*0 5zA~8D8:eAhnnq6H5IIZ[\\r   __main__)r   rB   r   rB   returnbool)
0123456789)r   rB   r   rB   r   ztuple[str, str])-)r   rB   r%   rB   r   rB   )TN)r   rB   r   rB   )N)r   rB   )rb   rB   r   rB   )rm   rB   r   rB   )ru   rB   r   rB   )())ru   rB   r   rB   r   rB   r   r|   )__doc__
__future__r   __all__dataclassesrE   rD   r.   rC   rr   r_   compiler   r   r   r   r   r   r   r	   r
   r   r   r   	dataclassr   r   r~   music21mainTestr   r   r   <module>r      s   #    	    RZZ
2::e.+.@(PVNb(   <KD8	 # # # q
q
q q 	qj zG r   