
    @                     0   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	Z	ddl
mZ ddl
mZ dd	lmZ dd
lmZ  ej                   d      Z ej                   d      Z ej                   d      Z ej                   d      Z ej                   d      Z ej                   d      Z ej                   d      Z eddg      Z G d de      Z G d de      Z G d de      Zd Zd Zd Z d Z!d Z"d Z#d  Z$d(d!Z%d" Z&d# Z'd$ Z(d% Z)d& Z*d' Z+y))z*File and Cloud URL representation classes.    )absolute_import)print_function)division)unicode_literalsN)CommandException)InvalidUrlError)system_util)	text_utilz(?P<provider>[^:]*)://$z.(?P<provider>[^:]*)://(?P<bucket>[^/]*)/{0,1}$z6(?P<provider>[^:]*)://(?P<bucket>[^/]*)/(?P<object>.*)z&(?P<object>.+)#(?P<generation>[0-9]+)$z"(?P<object>.+)#(?P<version_id>.+)$z([^:]*://)(?P<filepath>.*)z[*?\[\]]...c                   h    e Zd ZdZd Zd Zd Zd Zd ZddZ	d	 Z
ed
        Zed        Zd Zd Zy)
StorageUrlz4Abstract base class for file and Cloud Storage URLs.c                     t        d      )NzClone not overriddenNotImplementedErrorselfs    $platform/gsutil/gslib/storage_url.pyClonezStorageUrl.Clone6   s    
4
55    c                     t        d      )NzIsFileUrl not overriddenr   r   s    r   	IsFileUrlzStorageUrl.IsFileUrl9   s    
8
99r   c                     t        d      )NzIsCloudUrl not overriddenr   r   s    r   
IsCloudUrlzStorageUrl.IsCloudUrl<   s    
9
::r   c                      t        d      )NzIsStream not overriddenr    r   r   IsStreamzStorageUrl.IsStream?   s    
7
88r   c                     t        d      )NzIsFifo not overriddenr   r   s    r   IsFifozStorageUrl.IsFifoB   s    
5
66r   Nc                     t        d      )ai  Returns a prefix of this URL that can be used for iterating.

    Args:
      wildcard_suffix: If supplied, this wildcard suffix will be appended to the
                       prefix with a trailing slash before being returned.

    Returns:
      A prefix of this URL that can be used for iterating.

    If this URL contains a trailing slash, it will be stripped to create the
    prefix. This helps avoid infinite looping when prefixes are iterated, but
    preserves other slashes so that objects with '/' in the name are handled
    properly.

    For example, when recursively listing a bucket with the following contents:
      gs://bucket// <-- object named slash
      gs://bucket//one-dir-deep
    a top-level expansion with '/' as a delimiter will result in the following
    URL strings:
      'gs://bucket//' : OBJECT
      'gs://bucket//' : PREFIX
    If we right-strip all slashes from the prefix entry and add a wildcard
    suffix, we will get 'gs://bucket/*' which will produce identical results
    (and infinitely recurse).

    Example return values:
      ('gs://bucket/subdir/', '*') becomes 'gs://bucket/subdir/*'
      ('gs://bucket/', '*') becomes 'gs://bucket/*'
      ('gs://bucket/', None) becomes 'gs://bucket'
      ('gs://bucket/subdir//', '*') becomes 'gs://bucket/subdir//*'
      ('gs://bucket/subdir///', '**') becomes 'gs://bucket/subdir///**'
      ('gs://bucket/subdir/', '*') where 'subdir/' is an object becomes
           'gs://bucket/subdir/*', but iterating on this will return 'subdir/'
           as a BucketListingObject, so we will not recurse on it as a subdir
           during listing.
    zCreatePrefixUrl not overriddenr   r   wildcard_suffixs     r   CreatePrefixUrlzStorageUrl.CreatePrefixUrlE   s    J >
??r   c                 d   | j                   sy| j                  | j                   z   | j                  z   }|j                  dj                  | j                              }dj	                  |      }d|v r=t
        j                  j                  dj                  t        j                               yy)z-Warn if ** use may lead to undefined results.Nz{delim}**{delim})delim z**zw** behavior is undefined if directly preceeded or followed by with characters other than / in the cloud and {} locally.)
object_namer%   splitformatjoinsysstderrwriteossep)r   delimiter_bounded_url	split_url+removed_correct_double_wildcards_url_strings       r    _WarnIfUnsupportedDoubleWildcardz+StorageUrl._WarnIfUnsupportedDoubleWildcardl   s      JJ)9)99DJJF%++!!

!35I24'')2D/:: 
jjGGMvffH	 ;r   c                     t        d      )Nzurl_string not overriddenr   r   s    r   
url_stringzStorageUrl.url_string   s    
9
::r   c                     t        d      )Nz%versionless_url_string not overriddenr   r   s    r   versionless_url_stringz!StorageUrl.versionless_url_string   s    
E
FFr   c                 X    t        |t              xr | j                  |j                  k(  S N)
isinstancer   r5   )r   others     r   __eq__zStorageUrl.__eq__   s#    eZ(PT__@P@P-PPr   c                 ,    t        | j                        S r9   )hashr5   r   s    r   __hash__zStorageUrl.__hash__         r   r9   )__name__
__module____qualname____doc__r   r   r   r   r   r#   r3   propertyr5   r7   r<   r?   r   r   r   r   r   3   s_    <6:;97%@N, ; ; G GQ!r   r   c                   j    e Zd ZdZddZd Zd Zd Zd Zd Z	d Z
dd
Zed        Zed        Zd Zy	)_FileUrla  File URL class providing parsing and convenience methods.

    This class assists with usage and manipulation of an
    (optionally wildcarded) file URL string.  Depending on the string
    contents, this class represents one or more directories or files.

    For File URLs, scheme is always file, bucket_name is always blank,
    and object_name contains the file/directory path.
  c                 ~   d| _         t        j                  | _        d| _        t
        j                  |      }|r&|j                  dk(  r|j                  d      | _	        n|| _	        t        j                  r!| j                  j                  dd      | _	        d | _        || _        || _        | j!                          y )Nfiler&      /\)schemer.   r/   r%   bucket_nameFILE_OBJECT_REGEXmatch	lastindexgroupr'   r	   
IS_WINDOWSreplace
generation	is_streamis_fifor3   )r   r5   rV   rW   rP   s        r   __init__z_FileUrl.__init__   s    DKDJD ##J/EA%Qd#d ))11#t<dDODNDL))+r   c                 ,    t        | j                        S r9   )rG   r5   r   s    r   r   z_FileUrl.Clone   s    DOO$$r   c                      yNTr   r   s    r   r   z_FileUrl.IsFileUrl       r   c                      yNFr   r   s    r   r   z_FileUrl.IsCloudUrl       r   c                     | j                   S r9   )rV   r   s    r   r   z_FileUrl.IsStream   s    >>r   c                     | j                   S r9   )rW   r   s    r   r   z_FileUrl.IsFifo   s    <<r   c                     | j                          xr< | j                          xr) t        j                  j	                  | j
                        S r9   )r   r   r.   pathisdirr'   r   s    r   IsDirectoryz_FileUrl.IsDirectory   s>     ,$5 ,GGMM$**+-r   Nc                     | j                   S r9   r5   r!   s     r   r#   z_FileUrl.CreatePrefixUrl       ??r   c                 8    | j                   d| j                  S )N://)rM   r'   r   s    r   r5   z_FileUrl.url_string   s    T%5%566r   c                     | j                   S r9   rg   r   s    r   r7   z_FileUrl.versionless_url_string   s    ??r   c                     | j                   S r9   rg   r   s    r   __str__z_FileUrl.__str__   rh   r   )FFr9   )rA   rB   rC   rD   rX   r   r   r   r   r   re   r#   rE   r5   r7   rm   r   r   r   rG   rG      s\    ,,%- 7 7  r   rG   c                       e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd ZddZed        Zed        Zed        Zd Zy)	_CloudUrlak  Cloud URL class providing parsing and convenience methods.

    This class assists with usage and manipulation of an
    (optionally wildcarded) cloud URL string.  Depending on the string
    contents, this class represents a provider, bucket(s), or object(s).

    This class operates only on strings.  No cloud storage API calls are
    made from this class.
  c                    d | _         d| _        d | _        d | _        d | _        t
        j                  |      }t        j                  |      }|r|j                  d      | _         n|r.|j                  d      | _         |j                  d      | _        nXt        j                  |      }|r2|j                  d      | _         |j                  d      | _        |j                  d      | _        | j                  dk(  s| j                  dk(  rt        d| j                  z        | j                   dk(  rNt        j                  | j                        }|r|j                  d      | _        |j                  d	      | _        nk| j                   d
k(  r\t        j                  | j                        }|r;|j                  d      | _        |j                  d      | _        nt        d|z        |t        | j                         t        d      z   d  j                  | j                        rt        dj                  |            | j!                          y )NrK   providerbucketobjectr   r   z'%s is an invalid root-level object namegsrU   s3
version_idz/CloudUrl: URL string %s did not match URL regexrj   zPCloud URL scheme should be followed by colon and two slashes: "://". Found: "{}")rM   r%   rN   r'   rU   PROVIDER_REGEXrP   BUCKET_REGEXrR   OBJECT_REGEXr   GS_GENERATION_REGEXS3_VERSION_REGEXlen
startswithr)   r3   )r   r5   provider_matchbucket_matchobject_matchgeneration_matchversion_matchs          r   rX   z_CloudUrl.__init__   s   DKDJDDDO#))*5N%%j1L"((4dk	 &&z2dk%++H5d!''
3l	"((4'--h7'--h7s"d&6&6$&> I $ 0 0!1 2 2;;$066t7G7GH
/55h?D.44\BDO[[D *001A1AB-,228<D+11,?DO=
JL 	L 3t{{#c%j023>>tzzJ
+- - 	))+r   c                 ,    t        | j                        S r9   )ro   r5   r   s    r   r   z_CloudUrl.Clone
  s    T__%%r   c                      yr^   r   r   s    r   r   z_CloudUrl.IsFileUrl  r_   r   c                      yr[   r   r   s    r   r   z_CloudUrl.IsCloudUrl  r\   r   c                     t        d      )Nz"IsStream not supported on CloudUrlr   r   s    r   r   z_CloudUrl.IsStream  s    
B
CCr   c                     t        d      )Nz IsFifo not supported on CloudUrlr   r   s    r   r   z_CloudUrl.IsFifo  s    
@
AAr   c                 J    t        | j                  xr | j                         S r9   boolrN   r'   r   s    r   IsBucketz_CloudUrl.IsBucket  s!      9)9)9%9::r   c                 H    t        | j                  xr | j                        S r9   r   r   s    r   IsObjectz_CloudUrl.IsObject  s      5T%5%566r   c                 ,    t        | j                        S r9   )r   rU   r   s    r   HasGenerationz_CloudUrl.HasGeneration  r@   r   c                 J    t        | j                  xr | j                         S r9   )r   rM   rN   r   s    r   
IsProviderz_CloudUrl.IsProvider"  s    4D$4$4 455r   Nc                 B    t        | j                        }|r|d|}|S )NrK   )StripOneSlashr7   )r   r"   prefixs      r   r#   z_CloudUrl.CreatePrefixUrl%  s$    4667F /2fMr   c                 :    | j                   d| j                  dS )Nrj   rK   )rM   rN   r   s    r   bucket_url_stringz_CloudUrl.bucket_url_string+  s    d&6&677r   c                 b    | j                   }| j                         r|d| j                  z  z  }|S )Nz#%s)r7   r   rU   )r   url_strs     r   r5   z_CloudUrl.url_string/  s1    ))G((gNr   c                     | j                         rd| j                  z  S | j                         r| j                  S | j                  d| j                  d| j
                  S )Nz%s://rj   rK   )r   rM   r   r   rN   r'   r   s    r   r7   z _CloudUrl.versionless_url_string6  sM    t{{""	###;;(8(8$:J:JKKr   c                     | j                   S r9   rg   r   s    r   rm   z_CloudUrl.__str__>  rh   r   r9   )rA   rB   rC   rD   rX   r   r   r   r   r   r   r   r   r   r#   rE   r   r5   r7   rm   r   r   r   ro   ro      s    ),V&DB;7!6 8 8   L Lr   ro   c                 V    | j                  d      }|dk(  ry| d| j                         S )z)Returns scheme component of a URL string.rj   rI   r   )findlowerr   end_scheme_idxs     r   GetSchemeFromUrlStringr   B  s3     <<&.r1^$**,,r   c                 
    | dv S )N)rI   ru   rt   r   )
scheme_strs    r   IsKnownUrlSchemer   M  s    	+	++r   c                 B    | j                  d      }|dk(  r| S | |dz   d S )z'Returns path component of a URL string.rj   r      N)r   r   s     r   _GetPathFromUrlStringr   Q  s2     <<&.rN>A%&''r   c                 >    t        t        j                  |             S )zChecks whether url_string contains a wildcard.

  Args:
    url_string: URL string to check.

  Returns:
    bool indicator.
  )r   WILDCARD_REGEXsearchrg   s    r   ContainsWildcardr   [  s     
n##J/	00r   c                 R    | j                   dk(  r|rt        j                  |      S |S )aD  Decodes a generation from a StorageURL and a generation string.

  This is used to represent gs and s3 versioning.

  Args:
    url: StorageUrl representing the object.
    generation: Long or string representing the object's generation or
                version.

  Returns:
    Valid generation string for use in URLs.
  ru   )rM   r
   DecodeLongAsString)urlrU   s     r   GenerationFromUrlAndStringr   g  s)     	ZZ4J''
33	r   c                 L    | D ]  }t        |      }|j                         s y y)zChecks whether args_to_check contain any file URLs.

  Args:
    args_to_check: Command-line argument subset to check.

  Returns:
    True if args_to_check contains any file URLs.
  TF)StorageUrlFromStringr   args_to_checkr   storage_urls      r   HaveFileUrlsr   y  s,     g&w/K  
r   c                 n    | D ]0  }t        |      }|j                         s|j                         s0 y y)zChecks whether args_to_check contains any provider URLs (like 'gs://').

  Args:
    args_to_check: Command-line argument subset to check.

  Returns:
    True if args_to_check contains any provider URLs.
  TF)r   r   r   r   s      r   HaveProviderUrlsr     s7     g&w/KK$:$:$<  
r   c                     | j                         sy| j                  }|j                  d      ry|r'|j                         r|j                  j
                  }nd}|dk(  xr |j                  d      S )aX  Determines if a StorageUrl is a cloud subdir placeholder.

  This function is needed because GUI tools (like the GCS cloud console) allow
  users to create empty "folders" by creating a placeholder object; and parts
  of gsutil need to treat those placeholder objects specially. For example,
  gsutil rsync needs to avoid downloading those objects because they can cause
  conflicts (see comments in rsync command for details).

  We currently detect two cases:
    - Cloud objects whose name ends with '_$folder$'
    - Cloud objects whose name ends with '/'

  Args:
    url: (gslib.storage_url.StorageUrl) The URL to be checked.
    blr: (gslib.BucketListingRef or None) The blr to check, or None if not
        available. If `blr` is None, size won't be checked.

  Returns:
    (bool) True if the URL is a cloud subdir placeholder, otherwise False.
  Fz	_$folder$Tr   rK   )r   r5   endswithr   root_objectsize)r   blrr   r   s       r   IsCloudSubdirPlaceholderr     sd    * 
	NN'k"S\\^??DD		,w'',,r   c                     t        |       dk(  S )z'Returns whether a string is a file URL.rI   )r   r   s    r   IsFileUrlStringr     s     
 	(F	22r   c                 .   t        |       }t        |      st        d|z        |dk(  rSt        |       }|dk(  }d}	 t	        j
                  t        j                  |      j                        }t        | ||      S t        |       S # t        $ r Y $w xY w)z@Static factory function for creating a StorageUrl from a string.zUnrecognized scheme "%s"rI   -F)rV   rW   )r   r   r   r   statS_ISFIFOr.   st_modeOSErrorrG   ro   )r   rM   rc   rV   rW   s        r   r   r     s     "'*&	&	!
4v=
>>v )DIGbggdm334g Gy'BB	7	  
s   2B 	BBc                 6    | r| j                  d      r| d d S | S )NrK   r   )r   r   s    r   r   r     s#    !!#&3B<	.r   c                 v    d}d}| D ]-  }t        |      }|s|j                  }|j                  |k7  s- y |duS )aB  Tests whether the URLs are all for a single provider.

  Args:
    url_args: (Iterable[str]) Collection of strings to check.

  Returns:
    True if all URLs are for single provider; False if `url_args` was empty (as
    this would not result in a single unique provider) or URLs targeted multiple
    unique providers.
  NF)r   rM   )url_argsrq   r   r   s       r   UrlsAreForSingleProviderr     sJ     (#g
w
'Ch	x	  
	r   c                     t        d | D              r/t        t        d |             }t        |      xr t        |       S y)a6  Tests whether the URLs are a mix of buckets and objects.

  Args:
    url_args: (Iterable[gslib.storage_url.StorageUrl]) Collection of URLs to
    check.

  Returns:
    True if URLs are a mix of buckets and objects. False if URLs are all buckets
    or all objects. None if invalid Cloud URLs are included.
  c              3   <   K   | ]  }|j                           y wr9   )r   ).0r   s     r   	<genexpr>z0UrlsAreMixOfBucketsAndObjects.<locals>.<genexpr>  s     *Tc	Ts   c                 "    | j                         S r9   )r   )xs    r   <lambda>z/UrlsAreMixOfBucketsAndObjects.<locals>.<lambda>  s
    QZZ\r   N)alllistmapany)urlsare_bucketss     r   UrlsAreMixOfBucketsAndObjectsr     sA     	*T**s1489K{4C$4 44 +r   c                 6    t        |       r|st        d      yy)zBRaises error if mix of buckets and objects adjusted for recursion.z/Cannot operate on a mix of buckets and objects.N)r   r   )r   recursion_requesteds     r   )RaiseErrorIfUrlsAreMixOfBucketsAndObjectsr     s"    "4(1D
L
MM 2E(r   r9   ),rD   
__future__r   r   r   r   r.   rer   r+   gslib.exceptionr   r   gslib.utilsr	   r
   compilerw   rx   ry   rz   r{   rO   r   	frozensetRELATIVE_PATH_SYMBOLSrs   r   rG   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s1   1 & %  ' 	 	  
 , + # ! 67rzzKLrzz=? !bjj!JK 2::CD BJJ<= K(!3+. [! [!|@z @Fk
 k\-,(	1$  -B3&,5 Nr   