
                         x    d Z ddlZddlZddlZ G d de      Z G d de      Z G d de      Z G d	 d
e      Z	y)zDataflow-related utilities.
    Nc                       e Zd ZdZy)DecodeErrorzBase decode error.N)__name__
__module____qualname____doc__     1lib/third_party/ml_sdk/cloud/ml/util/_decoders.pyr   r      s    r
   r   c                       e Zd Zd Zy)PassthroughDecoderc                     |S Nr	   selfxs     r   decodezPassthroughDecoder.decode   s    Hr
   N)r   r   r   r   r	   r
   r   r   r      s    r
   r   c                       e Zd ZdZd Zy)JsonDecoderz"A decoder for JSON formatted data.c                 ,    t        j                  |      S r   )jsonloadsr   s     r   r   zJsonDecoder.decode$   s    ::a=r
   N)r   r   r   r   r   r	   r
   r   r   r   !   s
    *r
   r   c                   T    e Zd ZdZ G d de      Z G d de      Zd Zd Zd Z	d	 Z
y
)
CsvDecoderz$A decoder for CSV formatted data.
  c                   (    e Zd ZdZd Zd Zd Zd Zy)CsvDecoder._LineGeneratorzCA csv line generator that allows feeding lines to a csv.DictReader.c                     g | _         y r   )_linesr   s    r   __init__z"CsvDecoder._LineGenerator.__init__1   s	    dkr
   c                 V    | j                   rJ | j                   j                  |       y r   )r   append)r   lines     r   	push_linez#CsvDecoder._LineGenerator.push_line4   s     _
kkr
   c                     | S r   r	   r   s    r   __iter__z"CsvDecoder._LineGenerator.__iter__9   s    kr
   c                     t        | j                        }|dk(  rt        d      |dk(  s
J d|z         | j                  j                         S )Nr   z@Columns do not match specified csv headers: empty line was found   zUnexpected number of lines %s)lenr   r   pop)r   line_lengths     r   nextzCsvDecoder._LineGenerator.next<   sX     $k		NP 	PAL>LL [[__r
   N)r   r   r   r   r    r$   r&   r,   r	   r
   r   _LineGeneratorr   .   s    M
r
   r-   c                   (    e Zd ZdZd Zd Zd Zd Zy)CsvDecoder._ReaderWrapperz?A wrapper for csv.reader / csv.DictReader to make it picklable.c                     |||||f| _         || _        |r(t        j                  ||t	        |      |      | _        y t        j                  |t	        |      |      | _        y )N)	delimiterskipinitialspace)_state_line_generatorcsv
DictReaderstr_readerreader)r   line_generatorcolumn_namesr1   decode_to_dictskip_initial_spaces         r   r    z"CsvDecoder._ReaderWrapper.__init__M   s^    #\9n')dk+d	~~LC	N/1 zz.C	N3EGr
   c                 l    | j                   j                  |       | j                  j                         S r   )r4   r$   r8   r,   r   s     r   read_recordz%CsvDecoder._ReaderWrapper.read_recordZ   s)    
$$Q'\\  r
   c                     | j                   S r   )r3   r   s    r   __getstate__z&CsvDecoder._ReaderWrapper.__getstate__^   s    [[r
   c                 "     | j                   |  y r   )r    )r   states     r   __setstate__z&CsvDecoder._ReaderWrapper.__setstate__a   s    dmmUr
   N)r   r   r   r   r    r?   rA   rD   r	   r
   r   _ReaderWrapperr/   J   s    IG!r
   rE   c                     || _         t        |      | _        | j                  | j	                         ||||      | _        || _        || _        y)a  Initializer.

    Args:
      column_names: Tuple of strings. Order must match the order in the file.
      numeric_column_names: Tuple of strings. Contains column names that are
          numeric. Every name in numeric_column_names must also be in
          column_names.
      delimiter:  String used to separate fields.
      decode_to_dict: Boolean indicating whether the docoder should generate a
          dictionary instead of a raw sequence. True by default.
      fail_on_error: Whether to fail if a corrupt row is found.
      skip_initial_space: When True, whitespace immediately following the
          delimiter is ignored.
    N)_column_namesset_numeric_column_namesrE   r-   r8   _decode_to_dict_fail_on_error)r   r;   numeric_column_namesr1   r<   fail_on_errorr=   s          r   r    zCsvDecoder.__init__d   sS    " &D!$%9!:D&&|YDL *D'Dr
   c                 ^    | j                   rt        |      t        j                  d|       y)aj  Handle corrupt rows.

    Depending on whether the decoder is configured to fail on error it will
    raise a DecodeError or return None.

    Args:
      message: String, the error message to raise.
    Returns:
      None, when the decoder is not configured to fail on error.
    Raises:
      DecodeError: when the decoder is configured to fail on error.
    zDiscarding invalid row: %sN)rK   r   loggingwarning)r   messages     r   _handle_corrupt_rowzCsvDecoder._handle_corrupt_row}   s,        oo2G<r
   c                 ^    |r|j                         sy || j                  v rt        |      S |S r   )striprI   float)r   column_namevalues      r   
_get_valuezCsvDecoder._get_value   s/     d0005\Lr
   c                 d   	 | j                   j                  |      }t	        |      t	        | j
                        k7  r!| j                  d| j
                  d|      S | j                  rb|| j
                  d      !| j                  d| j
                  d|      S |j                         D ]  \  }}| j                  ||      ||<    |S t        | j
                        D ]  \  }}||   }| j                  ||      ||<   ! |S # t        $ r }| j                  |d|      cY d}~S d}~ww xY w)ax  Decodes the given string.

    Args:
      record: String to be decoded.

    Returns:
      Serialized object corresponding to decoded string. Or None if there's an
      error and the decoder is configured not to fail on error.

    Raises:
      DecodeError: If columns do not match specified csv headers.
      ValueError: If some numeric column has non-numeric data.
    z: Nz,Columns do not match specified csv headers: z -> )
r8   r?   	ExceptionrR   r)   rG   rJ   	iteritemsrX   	enumerate)r   recordenamerW   indexs         r   r   zCsvDecoder.decode   s8   >||''/f 6{c$,,--%%  &*+ +  
""2&	'	/''""F,- 	-  ))++$tU3t , M #4#5#56+%ue4u 7 M-  >%%!V&<==>s   D 	D/D*$D/*D/N)r   r   r   r   objectr-   rE   r    rR   rX   r   r	   r
   r   r   r   (   s3    
v 8v 4(2(&r
   r   )
r   r5   r   rO   r[   r   rb   r   r   r   r	   r
   r   <module>rc      sG      ) 
 & [ [r
   