
    b                         d Z ddlmZ ddlZdZ G d de      Z G d de      Z G d	 d
e      Z G d de      Z	 G d de      Z
 G d de      Z G d de      Zd Zd Zy)a  
Parser for parsing a regular expression.
Take a string representing a regular expression and return the root node of its
parse tree.

usage::

    root_node = parse_regex('(hello|world)')

Remarks:
- The regex parser processes multiline, it ignores all whitespace and supports
  multiple named groups with the same name and #-style comments.

Limitations:
- Lookahead is not supported.
    )unicode_literalsN)RepeatVariableRegex	Lookaheadtokenize_regexparse_regexc                       e Zd ZdZd Zd Zy)NodezT
    Base class for all the grammar nodes.
    (You don't initialize this one.)
    c                     t        | |g      S N)Sequenceself
other_nodes     Hlib/third_party/prompt_toolkit/contrib/regular_languages/regex_parser.py__add__zNode.__add__$   s    z*++    c                     t        | |g      S r   )Anyr   s     r   __or__zNode.__or__'   s    D*%&&r   N)__name__
__module____qualname____doc__r   r    r   r   r   r      s    ,'r   r   c                   "    e Zd ZdZd Zd Zd Zy)r   z
    Union operation (OR operation) between several grammars. You don't
    initialize this yourself, but it's a result of a "Grammar1 | Grammar2"
    operation.
    c                     || _         y r   childrenr   r    s     r   __init__zAny.__init__1   	     r   c                 4    t        | j                  |gz         S r   )r   r    r   s     r   r   z
Any.__or__4   s    4==J</00r   c                 N    | j                   j                  d| j                  dS N()	__class__r   r    r   s    r   __repr__zAny.__repr__7       >>22DMMBBr   N)r   r   r   r   r"   r   r,   r   r   r   r   r   +   s    
!1Cr   r   c                   "    e Zd ZdZd Zd Zd Zy)r   z
    Concatenation operation of several grammars. You don't initialize this
    yourself, but it's a result of a "Grammar1 + Grammar2" operation.
    c                     || _         y r   r   r!   s     r   r"   zSequence.__init__@   r#   r   c                 4    t        | j                  |gz         S r   )r   r    r   s     r   r   zSequence.__add__C   s    455r   c                 N    | j                   j                  d| j                  dS r&   r)   r+   s    r   r,   zSequence.__repr__F   r-   r   N)r   r   r   r   r"   r   r,   r   r   r   r   r   ;   s    !6Cr   r   c                       e Zd ZdZd Zd Zy)r   z
    Regular expression.
    c                 <    t        j                  |       || _        y r   )recompileregex)r   r6   s     r   r"   zRegex.__init__N   s    


5
r   c                 N    | j                   j                  d| j                  dS )Nz(/z/))r*   r   r6   r+   s    r   r,   zRegex.__repr__S   s    !^^44djjAAr   Nr   r   r   r   r"   r,   r   r   r   r   r   J   s    
Br   r   c                       e Zd ZdZddZd Zy)r   z
    Lookahead expression.
    c                      || _         || _        y r   )	childnodenegative)r   r;   r<   s      r   r"   zLookahead.__init__[   s    " r   c                 N    | j                   j                  d| j                  dS r&   r*   r   r;   r+   s    r   r,   zLookahead.__repr___   s    >>22DNNCCr   N)Fr8   r   r   r   r   r   W   s    !Dr   r   c                       e Zd ZdZddZd Zy)r   a  
    Mark a variable in the regular grammar. This will be translated into a
    named group. Each variable can have his own completer, validator, etc..

    :param childnode: The grammar which is wrapped inside this variable.
    :param varname: String.
    Nc                      || _         || _        y r   )r;   varname)r   r;   rA   s      r   r"   zVariable.__init__k   s    "r   c                 h    | j                   j                  d| j                  d| j                  dS )N(childnode=z
, varname=r(   )r*   r   r;   rA   r+   s    r   r,   zVariable.__repr__o   s'    NN##T^^T\\C 	Cr   r   r8   r   r   r   r   r   c   s    Cr   r   c                       e Zd ZddZd Zy)r   Nc                 <    || _         || _        || _        || _        y r   )r;   
min_repeat
max_repeatgreedy)r   r;   rF   rG   rH   s        r   r"   zRepeat.__init__u   s    "$$r   c                 N    | j                   j                  d| j                  dS )NrC   r(   r>   r+   s    r   r,   zRepeat.__repr__{   s    %)^^%<%<dnnMMr   )r   NT)r   r   r   r"   r,   r   r   r   r   r   t   s    Nr   r   c                 (   t        j                  dt         j                        }g }| ri|j                  |       }|rH| d|j	                          | |j	                         d } }|j                         s|j                  |       nt        d      | ri|S )z
    Takes a string, representing a regular expression as input, and tokenizes
    it.

    :param input: string, representing a regular expression.
    :returns: List of tokens.
    a  ^(
        \(\?P\<[a-zA-Z0-9_-]+\>  | # Start of named group.
        \(\?#[^)]*\)             | # Comment
        \(\?=                    | # Start of lookahead assertion
        \(\?!                    | # Start of negative lookahead assertion
        \(\?<=                   | # If preceded by.
        \(\?<                    | # If not preceded by.
        \(?:                     | # Start of group. (non capturing.)
        \(                       | # Start of group.
        \(?[iLmsux]              | # Flags.
        \(?P=[a-zA-Z]+\)         | # Back reference to named group
        \)                       | # End of group.
        \{[^{}]*\}               | # Repetition
        \*\? | \+\? | \?\?\      | # Non greedy repetition.
        \* | \+ | \?             | # Repetition
        \#.*\n                   | # Comment
        \\. |

        # Character group.
        \[
            ( [^\]\\]  |  \\.)*
        \]                  |

        [^(){}]             |
        .
    )NzCould not tokenize input regex.)r4   r5   VERBOSEmatchendisspaceappend	Exception)inputptokensmtokens        r   r   r      s     	

 	2 **3	A6 F
GGEN !%%'?E!%%'(O5E==?e$=>>  Mr   c                 x    dg| ddd   z   d fd        }t              dk7  rt        d      |S )zN
    Takes a list of tokens from the tokenizer, and returns a parse tree.
    r(   Nc                 >    t        |       dk(  r| d   S t        |       S )z9 Turn list into sequence when it contains several items.    r   )lenr   )lsts    r   wrapzparse_regex.<locals>.wrap   s     s8q=q6MC= r   c                     g g fd} rωj                         }|j                  d      r(t                |dd       }j                  |       n|dv r|dk(  }t	        d   |      d<   nd|d	v r|d
k(  }t	        d   d|      d<   nE|dv r8g k(  rt        dt              z         |dk(  }t	        d   dd|      d<   n	|dk(  rj                         g n|dv rj                                 n|dk(  r"j                  t                d             n|dk(  r"j                  t                d             n|dk(  r |        S |j                  d      rni|j                  d      rt        d|z        |j                  d      rt        d|z        |j                         rnj                  t        |             rt        d      ) Nc                      g k(  r       S j                         t        D  cg c]
  }  |        c}       S c c} w r   )rO   r   )ior_listresultr\   s    r   wrapped_resultz3parse_regex.<locals>._parse.<locals>.wrapped_result   sA    "}F|#v&W5WDGW5665s   ?z(?P<   rW   )rA   )*z*?rd   )rH   )+z+?re   rY   )rF   rH   )?z??zNothing to repeat.rf   r   )rF   rG   rH   |)r'   z(?:z(?!T)r<   z(?=Fr(   #{z%{}-style repitition not yet supportedz(?z%r not supportedzExpecting ')' token)
pop
startswithr   rO   r   rP   reprr   rN   r   )	rb   tvariablerH   r`   ra   _parserS   r\   s	       @@r   ro   zparse_regex.<locals>._parse   s   	7 

A||F##FHa"g>h'k!s(#F2Jv>r
k!s(#F2J1VLr
k!R<#$84<$GHH3hF!'r
qQW]!^F2Jcv&l"fh'ei4@Aei5ABc%''c"c" G! KLLd# 2Q 677eAh'g j -..r   r   zUnmatched parantheses.)rZ   rP   )regex_tokensra   ro   rS   r\   s     @@@r   r	   r	      sL    
 U\$B$''F!@/D XF
6{a011r   )r   
__future__r   r4   __all__objectr   r   r   r   r   r   r   r   r	   r   r   r   <module>rt      s     ( 		'6 	'C$ C Ct C
BD 
B	D 	DCt C"NT N/dUr   