
    w                        d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlZddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlZddlmZ  G d de      Z G d de      Z G d de      Z	 	 	 	 	 	 d&dZ G d de      Z  G d de      Z! G d de      Z" G d de      Z# ejH                  ddd g      Z% ejH                  d!d"d#g      Z& G d$ d%e
jN                        Z(y)'aB  Name expansion iterator and result classes.

Name expansion support for the various ways gsutil lets users refer to
collections of data (via explicit wildcarding as well as directory,
bucket, and bucket subdir implicit wildcarding). This class encapsulates
the various rules for determining how these expansions are done.
    )absolute_import)print_function)division)unicode_literalsN)encoding)CommandException)NO_URLS_MATCHED_GENERIC)NO_URLS_MATCHED_TARGET)PluralityCheckableIterator)SeekAheadResult)storage_v1_messages)StorageUrlFromStringc                       e Zd ZdZd Zd Zy)NameExpansionResulta*  Holds one fully expanded result from iterating over NameExpansionIterator.

  The member data in this class need to be pickleable because
  NameExpansionResult instances are passed through Multiprocessing.Queue. In
  particular, don't include any boto state like StorageUri, since that pulls
  in a big tree of objects, some of which aren't pickleable (and even if
  they were, pickling/unpickling such a large object tree would result in
  significant overhead).

  The state held in this object is needed for handling the various naming cases
  (e.g., copying from a single source URL to a directory generates different
  dest URL names than copying multiple URLs to a directory, to be consistent
  with naming rules used by the Unix cp command). For more details see comments
  in _NameExpansionIterator.
  c                     || _         || _        || _        || _        || _        |rt        j                  |      | _        yd| _        y)a  Instantiates a result from name expansion.

    Args:
      source_storage_url: StorageUrl that was being expanded.
      is_multi_source_request: bool indicator whether multiple input URLs or
          src_url_str expanded to more than one BucketListingRef.
      is_multi_top_level_source_request: same as is_multi_source_request but
          measured before recursion.
      names_container: Bool indicator whether src_url names a container.
      expanded_storage_url: StorageUrl that was expanded.
      expanded_result: cloud object metadata in MessageToJson form (for
          pickleability), if any was iterated; None otherwise.
          Consumers must call JsonToMessage to get an apitools Object.
    N)source_storage_urlis_multi_source_request!is_multi_top_level_source_requestnames_containerexpanded_storage_urlr   MessageToJsonexpanded_result)selfr   r   r   r   r   r   s          'platform/gsutil/gslib/name_expansion.py__init__zNameExpansionResult.__init__@   sR    " 1D#:D -ND**D 4D+ $11D15 	    c                      d| j                   z  S )Nz%s)r   r   s    r   __repr__zNameExpansionResult.__repr__Y   s    $++++r   N)__name__
__module____qualname____doc__r   r    r   r   r   r   /   s     62,r   r   c                   0    e Zd ZdZ	 	 	 	 	 	 ddZd Zd Zy)_NameExpansionIteratorzlClass that iterates over all source URLs passed to the iterator.

  See details in __iter__ function doc.
  Nc                 .   || _         || _        || _        || _        || _        || _        || _        | j                  j                         | j                  _        || _	        |	| _
        |
| _        || _        |st        dg      n|| _        ddd| _        y)a	  Creates a NameExpansionIterator.

    Args:
      command_name: name of command being run.
      debug: Debug level to pass to underlying iterators (range 0..3).
      logger: logging.Logger object.
      gsutil_api: Cloud storage interface.  Settable for testing/mocking.
      url_strs: PluralityCheckableIterator of URL strings needing expansion.
      recursion_requested: True if -r specified on command-line.  If so,
          listings will be flattened so mapped-to results contain objects
          spanning subdirectories.
      all_versions: Bool indicating whether to iterate over all object versions.
      cmd_supports_recursion: Bool indicating whether this command supports a
          '-r' flag. Useful for printing helpful error messages.
      project_id: Project id to use for bucket retrieval.
      ignore_symlinks: If True, ignore symlinks during iteration.
      continue_on_error: If true, yield no-match exceptions encountered during
                         iteration instead of raising them.
      bucket_listing_fields: Iterable fields to include in expanded results.
          Ex. ['name', 'acl']. Underyling iterator is responsible for converting
          these to list-style format ['items/name', 'items/acl']. If this is
          None, only the object name is included in the result.

    Examples of _NameExpansionIterator with recursion_requested=True:
      - Calling with one of the url_strs being 'gs://bucket' will enumerate all
        top-level objects, as will 'gs://bucket/' and 'gs://bucket/*'.
      - 'gs://bucket/**' will enumerate all objects in the bucket.
      - 'gs://bucket/abc' will enumerate either the single object abc or, if
         abc is a subdirectory, all objects under abc and any of its
         subdirectories.
      - 'gs://bucket/abc/**' will enumerate all objects under abc or any of its
        subdirectories.
      - 'file:///tmp' will enumerate all files under /tmp, as will
        'file:///tmp/*'
      - 'file:///tmp/**' will enumerate all files under /tmp or any of its
        subdirectories.

    Example if recursion_requested=False:
      calling with gs://bucket/abc/* lists matching objects
      or subdirs, but not sub-subdirs or objects beneath subdirs.

    Note: In step-by-step comments below we give examples assuming there's a
    gs://bucket with object paths:
      abcd/o1.txt
      abcd/o2.txt
      xyz/o1.txt
      xyz/o2.txt
    and a directory file://dir with file paths:
      dir/a.txt
      dir/b.txt
      dir/c/
    namez***)TFN)command_namedebuglogger
gsutil_apiurl_strsrecursion_requestedall_versionsHasPluralityhas_pluralitycmd_supports_recursion
project_idignore_symlinkscontinue_on_errorsetbucket_listing_fields_flatness_wildcard)r   r*   r+   r,   r-   r.   r/   r0   r3   r4   r5   r6   r8   s                r   r   z_NameExpansionIterator.__init__c   s    B %DDJDK DODM2D$D #'--"<"<">DMM"8D DO*D.D7L#vh-'< 	
 &*#6Dr   c              #   z  K   | j                   D ]u  }t        |      }|j                         rU|j                         s|j	                         r5| j                   j
                  rt        d      t        |ddd|d       td}|j                         rH|j                         r8| j                  s,t        | j                  |      j                  dg            }nWt        | j                  |      j                  | j                  d            }|j                         r|j                         rd}|j!                         }| j                   j
                  xs |}| j"                  | j                     }| j                  rt%        | ||| j                        }nt'        |      }t        |      }|j)                         r1| j*                  r	 t        t,        |z        t        t,        |z        t        t3        || j                  | j4                  | j6                  | j8                              }
|
j!                         }| j                   j
                  xs |}|
D ]"  \  }}|xs |}|j;                         r(t        |||||j<                  |j>                         Et        |j@                        }|j                         r|tB        jD                  |}n|jG                  |
      }t        | j                  |      jI                  | j                              }|xs |j!                         }| j                   j
                  xs |}|D ])  }t        |||d|j<                  |j>                         + % x y# t        $ r&}	|	t/        j0                         d	   f Y d}	~	d}	~	ww xY ww)a9  Iterates over all source URLs passed to the iterator.

    For each src url, expands wildcards, object-less bucket names,
    subdir bucket names, and directory names, and generates a flat listing of
    all the matching objects/files.

    You should instantiate this object using the static factory function
    NameExpansionIterator, because consumers of this iterator need the
    PluralityCheckableIterator wrapper built by that function.

    Yields:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.
    zPMultiple URL strings are not supported with streaming ("-") URLs or named pipes.FN)r   r   r   r   r   r   id)bucket_fieldsT)r8   expand_top_level_buckets   wildcard_suffixr8   )%r.   r   	IsFileUrlIsStreamIsFifor2   r   r   
IsCloudUrlIsBucketr/   r   WildcardIteratorIterBucketsIterAllr8   r1   r9   _ImplicitBucketSubdirIterator_NonContainerTuplifyIteratorIsEmptyr6   r
   sysexc_info_OmitNonRecursiveIteratorr*   r3   r,   IsObjectstorage_urlroot_object
url_stringossepCreatePrefixUrlIterObjects)r   url_strrQ   src_names_bucketpost_step1_itersrc_url_expands_to_multir   subdir_exp_wildcardpost_step2_iterepost_step3_iterr   r   blrsrc_names_containerexpanded_urlurl_to_iteratewc_iters                     r   __iter__z_NameExpansionIterator.__iter__   s    " ==(1k



!![%7%7%9==&&  "M N N![:?DI277B268 	8 	 

 
 
"{';';'=&&
 5!!'*66dV6LN 5!!'*22&*&@&@)- 3 /0 !!#(<(<(>!
!0!=!=!?+/==+F+F ,D+C ( !33D4L4LM		!	!7/#6&&( 7G2?Co 
	 	 	"!!)"#9G#CDD !!7'!AB
B 3
#OT5M5M$($5$5$($?$?NOo
 "1!=!=!?!%!<!< ":!9  %4
 ?C.A/<<>#!,&=/1#&??!oo/ / .cnn=,##%),bff6IJN *99 3 : 5N /##N3??(,(B(B @ DE' '? '=&-&:&:&< #%)]]%@%@ &>%= " c%#.(?5 $%(__ #1 1 I %4s !D " ) cllnQ'((()s1   GN;N	!F(N;		N8N3-N;3N88N;c                     t         j                  j                  || j                  | j                  | j
                  | j                  | j                        S )a2  Helper to instantiate gslib.WildcardIterator.

    Args are same as gslib.WildcardIterator interface, but this method fills
    in most of the values from instance state.

    Args:
      url_string: URL string naming wildcard objects to iterate.

    Returns:
      Wildcard iterator over URL string.
    )r0   r4   r5   r,   )gslibwildcard_iteratorCreateWildcardIteratorr-   r0   r4   r5   r,   )r   rS   s     r   rG   z'_NameExpansionIterator.WildcardIteratorQ  sL     ""99&&??,,{{ :  r   FTNFFN)r    r!   r"   r#   r   re   rG   r$   r   r   r&   r&   ]   s.     "&*$!&%)T7lV1pr   r&   c                   (    e Zd ZdZ	 	 	 	 	 ddZd Zy)SeekAheadNameExpansionIteratorzCreates and wraps a _NameExpansionIterator and yields SeekAheadResults.

  Unlike the NameExpansionIterator, which can make API calls upon __init__
  to check for plurality, this iterator does no work until the first iteration
  occurs.
  Nc                     |dv xr |
 | _         | j                   rdgnd}t        ||t        j                  d      |t	        |      |||||	d|      | _        y)z5Initializes a _NameExpansionIterator with the inputs.)cpmvrewritesizeNdummyTr0   r3   r4   r5   r6   r8   )count_data_bytesr&   logging	getLoggerr   name_expansion_iterator)r   r*   r+   r-   r.   r/   r0   r3   r4   r5   file_size_will_changer8   s               r   r   z'SeekAheadNameExpansionIterator.__init__n  su      *-DD 7!66 	
 )-(=(=VH4#9'""8,!5'3$5D r   c              #     K   | j                   D ]s  }| j                  rY|j                  rMt        j                  t
        j                  |j                        }|j                  xs d}t        |       ht                u y w)Nr   )
data_bytes)	rw   rt   r   r   JsonToMessageapitools_messagesObjectrq   r   )r   name_expansion_resultiterated_metadataiterated_sizes       r   re   z'SeekAheadNameExpansionIterator.__iter__  sq     !%!=!=			#8#H#H$22$$&;&K&KM)..3!77 ">s   BB)FTNFFr    r!   r"   r#   r   re   r$   r   r   rl   rl   f  s#     "&*$%*#5J r   rl   c                     t        |      }t        | |||||||||	|
|      }t        |      }|j                         rt        t              |S )ar  Static factory function for instantiating _NameExpansionIterator.

  This wraps the resulting iterator in a PluralityCheckableIterator and checks
  that it is non-empty. Also, allows url_strs to be either an array or an
  iterator.

  Args:
    command_name: name of command being run.
    debug: Debug level to pass to underlying iterators (range 0..3).
    logger: logging.Logger object.
    gsutil_api: Cloud storage interface.  Settable for testing/mocking.
    url_strs: Iterable URL strings needing expansion.
    recursion_requested: True if -r specified on command-line.  If so,
        listings will be flattened so mapped-to results contain objects
        spanning subdirectories.
    all_versions: Bool indicating whether to iterate over all object versions.
    cmd_supports_recursion: Bool indicating whether this command supports a '-r'
        flag. Useful for printing helpful error messages.
    project_id: Project id to use for the current command.
    ignore_symlinks: If True, ignore symlinks during iteration.
    continue_on_error: If true, yield no-match exceptions encountered during
                       iteration instead of raising them.
    bucket_listing_fields: Iterable fields to include in expanded results.
        Ex. ['name', 'acl']. Underyling iterator is responsible for converting
        these to list-style format ['items/name', 'items/acl']. If this is
        None, only the object name is included in the result.

  Raises:
    CommandException if underlying iterator is empty.

  Returns:
    Name expansion iterator instance.

  For example semantics, see comments in NameExpansionIterator.__init__.
  rs   )r   r&   rL   r   r	   )r*   r+   r,   r-   r.   r/   r0   r3   r4   r5   r6   r8   rw   s                r   NameExpansionIteratorr     sj    ^ (1(23%)13 77NO$$&
2
33	  r   c                       e Zd ZdZd Zd Zy)rK   zIterator that produces the tuple (False, blr) for each iterated value.

  Used for cases where blr_iter iterates over a set of
  BucketListingRefs known not to name containers.
  c                     || _         y)zTInstantiates iterator.

    Args:
      blr_iter: iterator of BucketListingRef.
    Nblr_iter)r   r   s     r   r   z%_NonContainerTuplifyIterator.__init__  s     DMr   c              #   :   K   | j                   D ]  }d|f 
 y w)NFr   )r   r`   s     r   re   z%_NonContainerTuplifyIterator.__iter__  s     }}CL s   Nr   r$   r   r   rK   rK     s    r   rK   c                       e Zd ZdZd Zd Zy)rO   a  Iterator wrapper for that omits certain values for non-recursive requests.

  This iterates over tuples of (names_container, BucketListingReference) and
  omits directories, prefixes, and buckets from non-recurisve requests
  so that we can properly calculate whether the source URL expands to multiple
  URLs.

  For example, if we have a bucket containing two objects: bucket/foo and
  bucket/foo/bar and we do a non-recursive iteration, only bucket/foo will be
  yielded.
  c                 J    || _         || _        || _        || _        || _        y)a  Instanties the iterator.

    Args:
      tuple_iter: Iterator over names_container, BucketListingReference
                  from step 2 in the NameExpansionIterator
      recursion_requested: If false, omit buckets, dirs, and subdirs
      command_name: Command name for user messages
      cmd_supports_recursion: Command recursion support for user messages
      logger: Log object for user messages
    N)
tuple_iterr/   r*   r3   r,   )r   r   r/   r*   r3   r,   s         r   r   z"_OmitNonRecursiveIterator.__init__  s*     !DO2D$D"8DDKr   c              #     K   | j                   D ]  \  }}| j                  s|j                         st        |j                        }|j                         rd}n|j                  }| j                  r3| j                  j                  d||j                  | j                         | j                  j                  d||j                         ||f  y w)N	directoryz-Omitting %s "%s". (Did you mean to do %s -r?)zOmitting %s "%s".)r   r/   rP   r   rS   rB   	type_namer3   r,   infor*   )r   r   r`   rb   descs        r   re   z"_OmitNonRecursiveIterator.__iter__  s     "&//#%%clln ,CNN;!!#$$&&
++

J1B1BD ++

.cnn
E$$ #2s   CCNr   r$   r   r   rO   rO     s    
$%r   rO   c                       e Zd ZdZd Zd Zy)rJ   a  Iterator wrapper that performs implicit bucket subdir expansion.

  Each iteration yields tuple (names_container, expanded BucketListingRefs)
    where names_container is true if URL names a directory, bucket,
    or bucket subdir.

  For example, iterating over [BucketListingRef("gs://abc")] would expand to:
    [BucketListingRef("gs://abc/o1"), BucketListingRef("gs://abc/o2")]
  if those subdir objects exist, and [BucketListingRef("gs://abc") otherwise.
  c                 <    || _         || _        || _        || _        y)a  Instantiates the iterator.

    Args:
      name_exp_instance: calling instance of NameExpansion class.
      blr_iter: iterator over BucketListingRef prefixes and objects.
      subdir_exp_wildcard: wildcard for expanding subdirectories;
          expected values are ** if the mapped-to results should contain
          objects spanning subdirectories, or * if only one level should
          be listed.
      bucket_listing_fields: Fields requested in enumerated results.
    N)r   name_exp_instancer\   r8   )r   r   r   r\   r8   s        r   r   z&_ImplicitBucketSubdirIterator.__init__3  s#     DM.D2D!6Dr   c              #     K   | j                   D ]  }|j                         rt        |j                        j	                  | j
                        }t        | j                  j                  |      j                  | j                              }|j                         s|D ]  }d|f 
 d|f |j                         rd|f t        d|z         y w)Nr?   rA   TFz7_ImplicitBucketSubdirIterator got a bucket reference %s)r   IsPrefixr   rS   rV   r\   r   r   rG   rI   r8   rL   rP   r   )r   r`   
prefix_urlimplicit_subdir_iteratorexp_blrs        r   re   z&_ImplicitBucketSubdirIterator.__iter__E  s     }}	)#..9II 44 J 6
#=""33J?GG&*&@&@ H B$C  (//11g/! 2
 
<<>clEKM 	M% s   CCNr   r$   r   r   rJ   rJ   '  s    	7$Mr   rJ   c                       e Zd ZdZd Zy)CopyObjectInfozARepresents the information needed for copying a single object.
  c                     |j                   | _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        || _        || _        y)aE  Instantiates the object info from name expansion result and destination.

    Args:
      name_expansion_result: StorageUrl that was being expanded.
      exp_dst_url: StorageUrl of the destination.
      have_existing_dst_container: Whether exp_url names an existing directory,
          bucket, or bucket subdirectory.
    N)r   r   r   r   r   r   exp_dst_urlhave_existing_dst_container)r   r~   r   r   s       r   r   zCopyObjectInfo.__init__`  sk     4FFD#8#P#PD ?? 	*0@@D 5 J JD0@@D"D'BD$r   N)r    r!   r"   r#   r   r$   r   r   r   r   \  s    Cr   r   DestinationInfor   r   %NameExpansionIteratorDestinationTuplename_expansion_iterdestinationc                   "    e Zd ZdZd Zd Zd Zy)CopyObjectsIteratora  Iterator wrapper for copying objects and keeping track of source URL types.

  This is used in the cp command for copying from multiple source to multiple
  destinations. It takes a list of NameExpansionIteratorDestinationTuple. It
  wraps them and return CopyObjectInfo objects that wraps NameExpansionResult
  with the destination. It's used also for collecting analytics
  PerformanceSummary info, because there may be multiple source URLs and we want
  to know if any of them are file URLs, if any of them are cloud URLs, if any of
  them require daisy chain operations, and if any use different providers. The
  source URL type information will be aggregated at the end of _SequentialApply
  or _ParallelApply.
  c                     || _         d| _        d| _        g | _        || _        t        | j                        }|j                  | _        |j                  | _	        y)a  Instantiates the iterator.

    Args:
      name_expansion_dest_iter: NameExpansionIteratorDestinationTuple iterator.
      is_daisy_chain: The -D option in cp might have already been specified, in
          which case we do not need to check again for daisy chain operations.
    FN)
is_daisy_chainhas_file_srchas_cloud_srcprovider_typesname_expansion_dest_iternextr   current_expansion_iterr   current_destination)r   r   r   name_expansion_dest_tuples       r   r   zCopyObjectsIterator.__init__  sY     )DDDD$<D! $T%B%B C";"O"OD8DDDr   c                     | S )Nr$   r   s    r   re   zCopyObjectsIterator.__iter__  s    Kr   c                    	 t        | j                        }t        || j                  j                  | j                  j                        }| j                  s!|j                  j                         rd| _        | j                  s!|j                  j                         rd| _        | j                  j                  j                         r!| j                  j                  j                   }nd}| j"                  s<|:|j                  j                         r |j                  j                   |k7  rd| _        |j                  j                   | j$                  vr/| j$                  j'                  |j                  j                          |S # t        $ rJ t        | j                        }|j                  | _        |j
                  | _        | j                         cY S w xY w)z@Keeps track of URL types as the command iterates over arguments.TN)r   r   StopIterationr   r   r   r   __next__r   r   r   r   r   rB   r   rE   schemer   r   append)r   r~   r   eltdst_url_schemes        r   r   zCopyObjectsIterator.__next__  s   "4#>#>? .11==11MMOC
 !7!7!A!A!Cd#"8"8"C"C"Ed ++668//;;BBnnN$>))+%%7 d
$$D,?,??
  !7!7!>!>?JA  "&t'D'D"E
#
7
7 !!:!F!Fd]]_s   F AGGN)r    r!   r"   r#   r   re   r   r$   r   r   r   r     s    E$$r   r   rj   ))r#   
__future__r   r   r   r   collectionsru   rT   rM   sixapitools.base.pyr   rg   gslib.exceptionr   r	   r
   "gslib.plurality_checkable_iteratorr   gslib.seek_ahead_threadr   "gslib.third_party.storage_apitoolsr   r|   gslib.wildcard_iteratorr   objectr   r&   rl   r   rK   rO   rJ   r   
namedtupler   r   Iteratorr   r$   r   r   <module>r      s   ' %  '   	 
 
 %  , 3 2 I 3 W  8+,& +,\FV FR5 V 5 | (-15%)*/,104@!F6 (/% /%d2MF 2MjCV C6 )+(( 	 	& )?(>(>+.) %G#,, Gr   