
                              d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ  G d
 d      Zd Zy)zBProvides split file preprocessing for adding splits to a database.    )absolute_import)division)unicode_literalsN)extra_types)apis)
exceptions)filesc                   .    e Zd ZdZd Zd Zd Zd Zd Zy)SplitFileParseraa  Parses a split file into a list of split points.

  The split file is expected to be in the format of:
  <ObjectType>[space]<ObjectName>[space](<Split Value>)
  <ObjectType>[space]<ObjectName>[space](<Split Value>)
  ...
  where ObjectType can be TABLE or INDEX.
  Each split point must be in a new line.
  Split value is expected to be a comma separated list of key parts.
    Split values should be surrounded by parenthesis like ()
    String values should be supplied in single quotes:'splitKeyPart'
    Boolean values should be one of: true/false
    INT64 and NUMERIC spanner datatype values should be supplied within
    single quotes values like string format: '123',
    '999999999999999999999999999.99'
    Other number values should be supplied without quotes: 1.287
    Timestamp values should be provided in the following format in single quote
    values: '2020-06-18T17:24:53Z'
    If the split value needs to have a comma, then that should be escaped by
    backslash.

    Examples:
    TABLE Singers ('c32ca57a-786c-2268-09d4-95182a9930be')
    INDEX Order (4.2)
    TABLE TableD  (0,'7ef9db22-d0e5-6041-8937-4bc6a7ef9db2')
    INDEX IndexXYZ ('8762203435012030000',NULL,NULL)
    INDEX IndexABC  (0, '2020-06-18T17:24:53Z') TableKey (123,'ab\,c')
    -- note that the above split value has a delimieter (comma) in it,
        hence escaped by a backslash.
  c                 $   || _         || _        t        j                  d      | _        t        j                  d      | _        t        j                  d      | _        t        j                  d      | _        t        j                  d      | _        y )Nz(\S+)\s+(\S+)\s+(.+)z"\((.*?)\) TABLE (\S+)\s+\((.*?)\)$z"\((.*?)\) INDEX (\S+)\s+\((.*?)\)$z\((.*?)\) TableKey \((.*?)\)$z
\((.*?)\)$)	splits_filesplit_expiration_daterecompilesplit_line_pattern&incorrect_split_with_table_key_pattern&incorrect_split_with_index_key_patternindex_full_key_patternsingle_key_pattern)selfr   r   s      ;lib/googlecloudsdk/command_lib/spanner/split_file_parser.py__init__zSplitFileParser.__init__?   ss    "D!6D jj)@AD24**-3D/ 35**-3D/ #%**-M"ND jj7D    c                    t        j                  dd      }g }t        j                  | j                        5 }|j                         j                         D ]  }| j                  |      }|r$|d   r|d   r|d   r|d   j                         dvr%t        j                  ddj                  |            |j                         }|d   j                         d	k(  r|d   |_        n |d   j                         d
k(  r
|d   |_        |d   r| j                  |d         |_        | j"                  r| j"                  |_        |j'                  |        	 ddd       |S # 1 sw Y   |S xY w)*Gets the split points from the input file.spannerv1
SplitValue
ObjectName
ObjectType)TABLEINDEX--splits-fileInvalid split point string: {}. Each split point must be in the format of <ObjectType> <ObjectName> (<Split Value>) where ObjectType can be TABLE or INDEXr!   r"   N)r   GetMessagesModuler	   
FileReaderr   read
splitlinesParseSplitPointStringupperc_exceptionsInvalidArgumentExceptionformatSplitPointstableindexParseSplitValuekeysr   
expireTimeappend)r   msgssplit_points_listfilesingle_split_stringsingle_splitsplits          r   ProcesszSplitFileParser.ProcessL   sf   !!)T2D			$**	+t!%!7!7!9
112EF---L)//19KK55228&9L2M	    "%++-8$\2%+,'--/7:$\2%+%++L,FG%*%%!77%
  '5 ": 
,8 9 
,8 s   DE  E*c                     | j                   j                  |      }|r3|j                  d      |j                  d      |j                  d      dS t        j                  ddj                  |            )a[  Parses a string in the format "<ObjectType> <ObjectName> (<Split Value>)".

    and returns a dictionary with the extracted information.

    Args:
      input_string: The string to parse.

    Returns:
      A dictionary with keys "ObjectType", "ObjectName", and "SplitValue",
      or None if the input string is not in the expected format.
             )r    r   r   r#   r$   )r   matchgroupr+   r,   r-   )r   input_stringr@   s      r   r)   z%SplitFileParser.ParseSplitPointStringn   so     ##)),7EAAA  11
..4f\.B	 r   c                    t        j                  dd      }g }|j                         }| j                  j	                  |      s| j
                  j	                  |      r%t        j                  ddj                  |            g }| j                  j	                  |      }|rA|j                  |j                  d             |j                  |j                  d             nT| j                  j	                  |      }|r!|j                  |j                  d             nt        j                  dd      |D ]  }|j                         }|j                  d      }|j                         }| j                  |      D ]  }t        j                          }	|d	k(  rd
|	_        nn|dk(  s|dk(  s
|dk(  s|dk(  rt%        |j'                               |	_        n;|j+                  d      dk(  rt-        |      |	_        n|j                  d      |	_        |j2                  j                  |	        |j                  |        |S )zParses a string in the format "(CommaSeparatedKeyParts) TableKey (CommaSeparatedKeyParts)".

    and returns a dictionary with the extracted information.

    Args:
      input_string: The string to parse.

    Returns:
      A split point key.
    r   r   r#   zaInvalid split point string: {}. Each line must contain a single split point for a table or index.r=   r>   z2The split value must be surrounded by parenthesis.z()NULLTtruefalseTRUEFALSE')r   r%   stripr   r@   r   r+   r,   r-   r   r4   rA   r   KeyTokenizeWithCsvr   	JsonValueis_nullboollowerboolean_valuefindfloatdouble_valuestring_valuekeyParts)
r   rB   r5   keys_allall_keys_stringsr@   input_string_per_key
single_keysplit_token	key_partss
             r   r1   zSplitFileParser.ParseSplitValue   s    !!)T2DH%%'L2288		4	4	:	:<	H11
//5vl/C  ''--l;Eekk!n-ekk!n-%%++L9e	A/33@
 	
 !11779177=88:j--.BC+))+	& ")
 V#'&'&*;+<+<+>&?I#%+',['9i$'2'8'8'>i$""9-# D$ ooj!- !1. Or   c                     t        j                  t        j                  |      ddt         j                  d      }t        |      S )zTokenizes text using commas as delimiters, ignoring commas within single quotes.

    Args:
      text: The text to tokenize.

    Returns:
      A list of tokens.
    rI   T\)	quotecharskipinitialspacequoting
escapechar)csvreaderioStringIO
QUOTE_NONEnext)r   textre   s      r   rM   zSplitFileParser.TokenizeWithCsv   s9     ZZ
Ds~~	F <r   N)	__name__
__module____qualname____doc__r   r;   r)   r1   rM    r   r   r   r      s"    >8 D8>@r   r   c                 ^    t        | j                  | j                        j                         S )r   )r   r   r   r;   )argss    r   ParseSplitPointsrr      s$    	))4+E+E	F	N	N	PPr   )rn   
__future__r   r   r   rd   rf   r   apitools.base.pyr   googlecloudsdk.api_lib.utilr   googlecloudsdk.callioper   r+   googlecloudsdk.core.utilr	   r   rr   ro   r   r   <module>rx      s<    I &  ' 
 	 	 ( , > *z zzQr   