
    !h&l                       S r SSKJr  SSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
Jr  SSKJr  SSKJr  SSKJr  SSKJr  SS	KJrJrJrJrJrJrJr  SS
KJr  SSKJr  SSK J!r!J"r"  SSK#J$r$  SSK%J&r&J'r'J(r(  SSK)J*r*J+r+  SSK,J-r-  SSK.J/r/J0r0J1r1  SSK2J3r3J4r4  SSK5J6r6  SSK7J8r8  SSK9J:r:  SSK;J<r<  SSK=J>r>  SSK?J@r@JArAJBrB  SSKCJDrD  \(       a$  SSKEJFrF  SSKGJHrH  SSKIJJrJ  SSKKJLrL  SSKMJNrN  SSKOJPrP  \R                  " \R5      rSS4S  jrTS5S! jrU " S" S#\V5      rW " S$ S%\S&S'9rX " S( S)\5      rY " S* S+5      rZ " S, S-5      r[ " S. S/5      r\ " S0 S15      r] " S2 S3\15      r^g)6z@
Files Pipeline

See documentation in topics/media-pipeline.rst
    )annotationsN)defaultdict)suppress)FTP)BytesIO)Path)IOTYPE_CHECKINGAnyNoReturnProtocol	TypedDictcast)urlparse)ItemAdapter)DeferredmaybeDeferred)deferToThread)IgnoreRequestNotConfiguredScrapyDeprecationWarning)RequestResponse)NO_CALLBACK)FileInfoFileInfoOrErrorMediaPipeline)BaseSettingsSettings)is_botocore_available)CaseInsensitiveDict)method_is_overridden)ftp_store_file)failure_to_exc_info)get_func_argsglobal_object_nameto_bytes)referer_str)Callable)PathLike)Failure)Self)Spider)Crawlerc                    [        U 5      $ N)str)paths    ]/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/scrapy/pipelines/files.py
_to_stringr4   9   s    t9    c                    [         R                  " 5       n U R                  S5      nU(       d   UR	                  5       $ UR                  U5        M<  )zCalculate the md5 checksum of a file-like object without reading its
whole content in memory.

>>> from io import BytesIO
>>> _md5sum(BytesIO(b'file content to hash'))
'784406af91dd5a54fbb9c84c2236595a'
i  )hashlibmd5readupdate	hexdigest)filemds      r3   _md5sumr?   =   sC     	A
IIdO;;= 	
	 r5   c                      \ rS rSrSrSrg)FileExceptionN   zGeneral media error exception N)__name__
__module____qualname____firstlineno____doc____static_attributes__rC   r5   r3   rA   rA   N   s    'r5   rA   c                  *    \ rS rSr% S\S'   S\S'   Srg)StatInfoR   r1   checksumfloatlast_modifiedrC   N)rD   rE   rF   rG   __annotations__rI   rC   r5   r3   rK   rK   R   s    Mr5   rK   F)totalc                  \    \ rS rSrSS jr  S           S	S jjr      S
S jrSrg)FilesStoreProtocolW   c                    g r0   rC   selfbasedirs     r3   __init__FilesStoreProtocol.__init__X   s    cr5   Nc                    g r0   rC   rW   r2   bufinfometaheaderss         r3   persist_fileFilesStoreProtocol.persist_fileZ   s      #r5   c                    g r0   rC   )rW   r2   r^   s      r3   	stat_fileFilesStoreProtocol.stat_filec   s    (+r5   rC   )rX   r1   NN)r2   r1   r]   r   r^   MediaPipeline.SpiderInfor_   dict[str, Any] | Noner`   dict[str, str] | NonereturnzDeferred[Any] | None)r2   r1   r^   rg   rj   zStatInfo | Deferred[StatInfo])rD   rE   rF   rG   rY   ra   rd   rI   rC   r5   r3   rS   rS   W   si    ) '+)-## # '	#
 $# '# 
#,,7,	&,r5   rS   c                      \ rS rSrS	S jr  S
           SS jjr      SS jrSS jr S     SS jjrSr	g)FSFilesStoreh   c                    [        U5      nSU;   a  UR                  SS5      S   nXl        U R                  [	        U R                  5      5        [        [        5      U l        g )Nz://   )r4   splitrX   _mkdirr   r   setcreated_directoriesrV   s     r3   rY   FSFilesStore.__init__i   sS    W%GmmE1-a0G#D&' 	 r5   Nc                    U R                  U5      nU R                  UR                  U5        UR                  UR	                  5       5        g r0   )_get_filesystem_pathrq   parentwrite_bytesgetvalue)rW   r2   r]   r^   r_   r`   absolute_paths          r3   ra   FSFilesStore.persist_files   s<     11$7M(($/!!#,,.1r5   c                    U R                  U5      n UR                  5       R                  nUR	                  S5       n[        U5      nS S S 5        UWS.$ ! [         a    0 s $ f = f! , (       d  f       N%= f)NrbrO   rM   )rv   statst_mtimeOSErroropenr?   )rW   r2   r^   rz   rO   frM   s          r3   rd   FSFilesStore.stat_file   sy     11$7	)..099M %qzH & "/HEE  	I	 &%s   A A)A&%A&)
A7c                b    [        U5      R                  S5      n[        U R                  /UQ76 $ )N/)r4   rp   r   rX   )rW   r2   
path_compss      r3   rv   !FSFilesStore._get_filesystem_path   s+    %++C0
DLL.:..r5   c                    U(       a  U R                   U   O	[        5       n[        U5      U;  a?  UR                  5       (       d  UR	                  SS9  UR                  [        U5      5        g g )NT)parents)rs   rr   r1   existsmkdiradd)rW   dirnamedomainseens       r3   rq   FSFilesStore._mkdir   sW     >D11&9w<t#>>##d+HHS\" $r5   )rX   rs   )rX   str | PathLike[str]rf   )r2   r   r]   r   r^   rg   r_   rh   r`   ri   rj   None)r2   r   r^   rg   rj   rK   )r2   r   rj   r   r0   )r   r   r   MediaPipeline.SpiderInfo | Nonerj   r   )
rD   rE   rF   rG   rY   ra   rd   rv   rq   rI   rC   r5   r3   rl   rl   h   s    
 '+)-
2!
2 
2 '	
2
 $
2 '
2 

2F'F/GF	F/
 HL##%D#	# #r5   rl   c                      \ rS rSrSrSrSrSrSrSr	Sr
SrSS0rSS jr      SS jrSS jr  S           SS	 jjrSS
 jrSrg)S3FilesStore   NprivateCache-Controlmax-age=172800c                   [        5       (       d  [        S5      eSS KnUR                  R	                  5       nUR                  SU R                  U R                  U R                  U R                  U R                  U R                  U R                  S9U l        UR                  S5      (       d  [        SU S35      eUSS  R!                  S	S
5      u  U l        U l        g )Nzmissing botocore libraryr   s3)aws_access_key_idaws_secret_access_keyaws_session_tokenendpoint_urlregion_nameuse_sslverifyzs3://Incorrect URI scheme in z, expected 's3'   r   ro   )r    r   botocore.sessionsessionget_sessioncreate_clientAWS_ACCESS_KEY_IDAWS_SECRET_ACCESS_KEYAWS_SESSION_TOKENAWS_ENDPOINT_URLAWS_REGION_NAMEAWS_USE_SSL
AWS_VERIFY	s3_client
startswith
ValueErrorrp   bucketprefix)rW   uribotocorer   s       r3   rY   S3FilesStore.__init__   s    $&& :;;""..0 .."44"&"<"<"44..,,$$?? / 	
 ~~g&&7uOLMM#&qr7==a#8 T[r5   c                L    SS jnU R                  U5      R                  U5      $ )Nc                    U S   R                  S5      nU S   n[        R                  " UR                  5       5      nXS.$ )NETag"LastModifiedrM   rO   )striptimemktime	timetuple)boto_keyrM   rO   modified_stamps       r3   
_onsuccess*S3FilesStore.stat_file.<locals>._onsuccess   sA    '--c2H$^4M![[)@)@)BCN (JJr5   )r   dict[str, Any]rj   rK   )_get_boto_keyaddCallback)rW   r2   r^   r   s       r3   rd   S3FilesStore.stat_file   s&    	K !!$'33J??r5   c           	         U R                    U 3n[        S[        U R                  R                  U R
                  US95      $ )NDeferred[dict[str, Any]])BucketKey)r   r   r   r   head_objectr   )rW   r2   key_names      r3   r   S3FilesStore._get_boto_key   sC    kk]4&)&**{{
 	
r5   c                   U R                    U 3nUR                  S5        U R                  U R                  5      nU(       a   UR	                  U R                  U5      5        [        U R                  R                  4U R                  UUU=(       d    0 R                  5        VV	s0 s H  u  pU[        U	5      _M     sn	nU R                  S.UD6$ s  sn	nf )zUpload file to S3 storager   )r   r   BodyMetadataACL)r   seek_headers_to_botocore_kwargsHEADERSr:   r   r   
put_objectr   itemsr1   POLICY)
rW   r2   r]   r^   r_   r`   r   extrakvs
             r3   ra   S3FilesStore.persist_file   s     kk]4&)00>LL99'BCNN%%
;;-1ZR,>,>,@A,@DAaQi,@A
 
 	

 Bs   (Cc                   [        0 SS_SS_SS_SS_S	S
_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S S!_S"S#S$S%S&S'S(S)S*.E5      n0 nUR                  5        H  u  pE X$   nXSU'   M     U$ ! [         a    [        S+U S,35      ef = f)-z.Convert headers to botocore keyword arguments.Content-TypeContentTyper   CacheControlzContent-DispositionContentDispositionzContent-EncodingContentEncodingzContent-LanguageContentLanguagezContent-LengthContentLengthzContent-MD5
ContentMD5ExpireszX-Amz-Grant-Full-ControlGrantFullControlzX-Amz-Grant-Read	GrantReadzX-Amz-Grant-Read-ACPGrantReadACPzX-Amz-Grant-Write-ACPGrantWriteACPzX-Amz-Object-Lock-Legal-HoldObjectLockLegalHoldStatuszX-Amz-Object-Lock-ModeObjectLockModez#X-Amz-Object-Lock-Retain-Until-DateObjectLockRetainUntilDatezX-Amz-Request-PayerRequestPayerzX-Amz-Server-Side-EncryptionServerSideEncryptionSSEKMSKeyIdSSEKMSEncryptionContextSSECustomerAlgorithmSSECustomerKeySSECustomerKeyMD5StorageClassTaggingWebsiteRedirectLocation)z+X-Amz-Server-Side-Encryption-Aws-Kms-Key-Idz$X-Amz-Server-Side-Encryption-Contextz/X-Amz-Server-Side-Encryption-Customer-Algorithmz)X-Amz-Server-Side-Encryption-Customer-Keyz-X-Amz-Server-Side-Encryption-Customer-Key-Md5zX-Amz-Storage-ClasszX-Amz-TaggingzX-Amz-Website-Redirect-LocationzHeader "z" is not supported by botocore)r!   r   KeyError	TypeError)rW   r`   mappingr   keyvaluekwargs          r3   r   (S3FilesStore._headers_to_botocore_kwargs   sb    & &'; #$5	
 #$5 !/ | 9 +,> #K ' ( /0K )*: 67R  &~!" /0F#$ @M8QCY=MAT'5!*3L3
: !#!--/JCP !%L *   P(3%/M NOOPs   "A00B
)r   r   r   r   r1   r2   r1   r^   rg   rj   Deferred[StatInfo])r2   r1   rj   r   rf   r2   r1   r]   r   r^   rg   r_   rh   r`   ri   rj   zDeferred[Any])r`   r   rj   r   )rD   rE   rF   rG   r   r   r   r   r   r   r   r   r   rY   rd   r   ra   r   rI   rC   r5   r3   r   r      s     OKJF)G9(	@	@7	@		@	
  '+)-

 
 '	

 $
 '
 

0'r5   r   c                  |    \ rS rSrSrSrSrS
S jr      SS jrSS jr	SS jr
  S           SS jjrS	rg)GCSFilesStorei  Nr   c                \   SSK Jn  UR                  U R                  S9nUSS  R	                  SS5      u  pEUR                  U5      U l        XPl        U R
                  R                  SS/5      nSU;  a  [        R                  S	S
U05        SU;  a  [        R                  SS
U05        g g )Nr   )storage)projectr   r   ro   zstorage.objects.getzstorage.objects.createzNo 'storage.objects.get' permission for GSC bucket %(bucket)s. Checking if files are up to date will be impossible. Files will be downloaded every time.r   zbNo 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!)google.cloudr	  ClientGCS_PROJECT_IDrp   r   r   test_iam_permissionsloggerwarningerror)rW   r   r	  clientr   r   permissionss          r3   rY   GCSFilesStore.__init__  s    ((;(;<QRsA.mmF+!kk66"$<=
 !3NNl6"
 $;6LLt6" 7r5   c                    SS jnU R                  U5      n[        [        [           [	        U R
                  R                  U5      R                  U5      5      $ )Nc                    U (       a`  [         R                  " U R                  5      R                  5       n[        R
                  " U R                  R                  5       5      nXS.$ 0 $ )Nr   )base64	b64decodemd5_hashhexr   r   updatedr   )blobrM   rO   s      r3   r   +GCSFilesStore.stat_file.<locals>._onsuccess7  sK    !++DMM:>>@ $DLL,B,B,D E$,MMIr5   )rj   rK   )_get_blob_pathr   r   rK   r   r   get_blobr   )rW   r2   r^   r   	blob_paths        r3   rd   GCSFilesStore.stat_file4  sM    	 ''-	X$++..	:FFzR
 	
r5   c                (    U(       a  SU;   a  US   $ g)Nr   zapplication/octet-streamrC   )rW   r`   s     r3   _get_content_typeGCSFilesStore._get_content_typeD  s    ~0>**)r5   c                     U R                   U-   $ r0   )r   )rW   r2   s     r3   r  GCSFilesStore._get_blob_pathI  s    {{T!!r5   c                |   U R                  U5      nU R                  R                  U5      nU R                  Ul        U=(       d    0 R                  5        VV	s0 s H  u  pU[        U	5      _M     sn	nUl        [        UR                  UR                  5       U R                  U5      U R                  S9$ s  sn	nf )N)datacontent_typepredefined_acl)r  r   r  CACHE_CONTROLcache_controlr   r1   metadatar   upload_from_stringry   r#  r   )
rW   r2   r]   r^   r_   r`   r   r  r   r   s
             r3   ra   GCSFilesStore.persist_fileL  s     ''-	{{	*!//04
/A/A/CD/CtqCF/CD##//8;;	
 	
 Es   B8)r   r   r  r  )r`   ri   rj   r1   )r2   r1   rj   r1   rf   r  )rD   rE   rF   rG   r  r+  r   rY   rd   r#  r  ra   rI   rC   r5   r3   r  r    s    N$M F,

7
	
 *
" '+)-

 
 '	

 $
 '
 

 
r5   r  c                      \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SS jr  S           SS	 jjr	      SS
 jr
Srg)FTPFilesStorei`  Nz
str | NoneFTP_USERNAMEFTP_PASSWORDzbool | NoneUSE_ACTIVE_MODEc                V   UR                  S5      (       d  [        SU S35      e[        U5      nUR                  (       d   eUR                  (       d   eUR                  U l        UR                  U l        [        UR                  =(       d    S5      U l        U R                  (       d   eU R                  (       d   eUR                  =(       d    U R                  U l	        UR                  =(       d    U R                  U l
        UR                  R                  S5      U l        g )Nzftp://r   z, expected 'ftp'   r   )r   r   r   porthostnamehostintr2  r3  usernamepasswordr2   rstriprX   )rW   r   us      r3   rY   FTPFilesStore.__init__e  s    ~~h''7u<LMNNSMvvvzzz		"%	        ZZ<4+<+<ZZ<4+<+<FFMM#.r5   c                    U R                    SU 3n[        [        UUU R                  U R                  U R
                  U R                  U R                  S9$ )Nr   )r2   r<   r9  r7  r;  r<  use_active_mode)rX   r   r#   r9  r7  r;  r<  r4  r\   s         r3   ra   FTPFilesStore.persist_filet  sS     ,,q']]]] 00	
 		
r5   c                >   ^  SU 4S jjn[        S[        X15      5      $ )Nc                $  >  [        5       nUR                  TR                  TR                  5        UR	                  TR
                  TR                  5        TR                  (       a  UR                  S5        TR                   SU  3n[        UR                  SU 35      SS  R                  5       5      n[        R                  " 5       nUR                  SU 3UR                   5        X4R#                  5       S.$ ! [$         a    0 s $ f = f)NFr   zMDTM    zRETR r~   )r   connectr9  r7  loginr;  r<  r4  set_pasvrX   rN   voidcmdr   r7   r8   
retrbinaryr:   r;   	Exception)r2   ftp	file_pathrO   r=   rW   s        r3   
_stat_file+FTPFilesStore.stat_file.<locals>._stat_file  s    eDIItyy1		$--7''LL'#||nAdV4	 %ckkE)2E&Fqr&J&P&P&R SKKMyk2AHH=)6KKMRR 	s   C<D   DDr  )r2   r1   rj   rK   )r   r   )rW   r2   r^   rN  s   `   r3   rd   FTPFilesStore.stat_file  s    	  (-
*IJJr5   )rX   r9  r<  r7  r;  r  rf   r  r  )rD   rE   rF   rG   r2  rP   r3  r4  rY   ra   rd   rI   rC   r5   r3   r1  r1  `  s    #L*##L*##'O['/( '+)-

 
 '	

 $
 '
 

(KK7K	Kr5   r1  c                    ^  \ rS rSr% SrSrS\S'   SrS\S'   \\\	\
\S	.rS
\S'   SrS\S'   SrS\S'     S"SS.       S#U 4S jjjjr\S$S j5       r\S%S j5       r\S&S j5       r\S'S j5       rS(S jrSS.       S)S jjr        S*S jrSS.         S+S jjrS,S jr      S-S jrSS.         S.S jjr        S/S jr  S"SS.         S0S  jjjrS!rU =r$ )1FilesPipelinei  a\  Abstract pipeline that implement the file downloading

This pipeline tries to minimize network transfers and file processing,
doing stat of the files and determining if file is new, up-to-date or
expired.

``new`` files are those that pipeline never processed and needs to be
    downloaded from supplier site the first time.

``uptodate`` files are the ones that the pipeline processed and are still
    valid files.

``expired`` files are those that pipeline already processed but the last
    modification was made long time ago, so a reprocessing is recommended to
    refresh it in case of change.

r<   r1   
MEDIA_NAMEZ   r:  EXPIRES) r<   r   gsrL  z#dict[str, type[FilesStoreProtocol]]STORE_SCHEMES	file_urlsDEFAULT_FILES_URLS_FIELDfilesDEFAULT_FILES_RESULT_FIELDNcrawlerc                 > [        U5      nU(       d  [        eUbA  Ub1  [        R                  " S[	        U R
                  5       S3[        SS9  UR                  nO#[        U[        5      (       d  Uc  [        U5      nSnU R                  U5      U l        [        R                  " U R                  XSS9nUR!                  U" S5      U R"                  5      U l        ['        U S5      (       d  U R(                  U l        ['        U S	5      (       d  U R,                  U l        UR1                  U" S5      U R*                  5      U l        UR1                  U" S	5      U R.                  5      U l        [6        TU ]q  UU(       d  UOS US
9  g )NzbFilesPipeline.__init__() was called with a crawler instance and a settings instance when creating z. The settings instance will be ignored and crawler.settings will be used. The settings argument will be removed in a future Scrapy version.   category
stacklevelrR  )base_class_namesettingsFILES_EXPIRESFILES_URLS_FIELDFILES_RESULT_FIELD)download_funcre  r^  )r4   r   warningswarnr&   	__class__r   re  
isinstancedictr   
_get_storestore	functoolspartial_key_for_pipegetintrU  expireshasattrrZ  rg  r\  rh  getfiles_urls_fieldfiles_result_fieldsuperrY   )rW   	store_uriri  re  r^  cls_nameresolverl  s          r3   rY   FilesPipeline.__init__  sn    y)	#&&8&H%I J|} 6  ''H$''8+;)H")-)C
##
 %OOGO,DdllSt/00$($A$AD!t122&*&E&ED#%-\\&')>)>&
 (0||()4+B+B(
 	'%,X$ 	 	
r5   c                t    [         R                  " U R                   S3[        SS9  U R	                  US 5      $ )Nz;.from_settings() is deprecated, use from_crawler() instead.r`  ra  )rj  rk  rD   r   _from_settings)clsre  s     r3   from_settingsFilesPipeline.from_settings  s8    ||nWX-	

 !!(D11r5   c                   [        U [        S5      (       aS  [        R                  " [	        U 5       S3[
        S9  U R                  UR                  5      nUR                  U5        U$ U R                  UR                  U5      $ )Nr  z overrides FilesPipeline.from_settings(). This method is deprecated and won't be called in future Scrapy versions, please update your code so that it overrides from_crawler() instead.rb  )
r"   rR  rj  rk  r&   r   r  re  _finish_initr  )r  r^  os      r3   from_crawlerFilesPipeline.from_crawler  s{    ]ODDMM%c*+ ,X Y 2	 !!'"2"23ANN7#H!!'"2"2G<<r5   c                    U R                  U5        US   nS[        U R                  5      ;   a  U " X2S9nU$ U " X1S9nU(       a  UR                  U5        [        R
                  " [        U 5       S3[        S9  U$ )NFILES_STOREr^  r]  )re  z|.__init__() doesn't take a crawler argument. This is deprecated and the argument will be required in future Scrapy versions.r  )_update_storesr%   rY   r  rj  rk  r&   r   )r  re  r^  r{  r  s        r3   r  FilesPipeline._from_settings  s    8$]+	cll33I/A  I1Aw'MM%c*+ ,c c1
 r5   c                
   [        [        [           U R                  S   5      nUS   Ul        US   Ul        US   Ul        US   Ul        US   Ul        US   Ul	        US   Ul
        US	   Ul        [        [        [           U R                  S
   5      nUS   Ul        US   =(       d    S Ul        [        [        [           U R                  S   5      nUS   Ul        US   Ul        UR#                  S5      Ul        g )Nr   r   r   r   r   r   r   r   FILES_STORE_S3_ACLrW  r  FILES_STORE_GCS_ACLrL  FTP_USERr3  FEED_STORAGE_FTP_ACTIVE)r   typer   rX  r   r   r   r   r   r   r   r   r  r  r1  r2  r3  getboolr4  )r  re  s3store	gcs_store	ftp_stores        r3   r  FilesPipeline._update_stores  s   &*4+=s?P?PQU?V&W$,-@$A!(01H(I%$,-@$A!#+,>#? "*+<"=&}5%l3!"67)-!2!24!8*
	 $,,<#=	 #$9:Bd	)-!2!25!9*
	 "**!5	!).!9	$,$4$45N$O	!r5   c                    [        U5      R                  5       (       a  SO[        U5      R                  nU R                  U   nU" U5      $ )Nr<   )r   is_absoluter   schemerX  )rW   r   r  	store_clss       r3   ro  FilesPipeline._get_store-  s?    900228L8L&&v.	~r5   itemc                  ^ ^^^ SUUUU 4S jjnT R                  TTUS9m[        T R                  R                  TT5      nUR	                  U5      nUR                  S 5        UR                  UU 4S j5        U$ )Nc                  > U (       d  g U R                  SS 5      nU(       d  g [        R                  " 5       U-
  nUS-  S-  S-  nUT	R                  :  a  g [        T5      n[        R                  ST	R                  TUS.STR                  0S9  T	R                  TR                  S5        U R                  S	S 5      nTR                  TUSS
.$ )NrO   <      zTFile (uptodate): Downloaded %(medianame)s from %(request)s referred in <%(referer)s>)	medianamerequestrefererspiderr   uptodaterM   urlr2   rM   status)
rw  r   ru  r(   r  debugrS  r  	inc_statsr  )
resultrO   age_secondsage_daysr  rM   r^   r2   r  rW   s
         r3   r   3FilesPipeline.media_to_download.<locals>._onsuccess6  s    "JJ=M ))+5K"R'",r1H$,,&!'*GLL,"oo'gV-	   NN4;;
3zz*d3H{{$$	 r5   )r^   r  c                    g r0   rC   )_s    r3   <lambda>1FilesPipeline.media_to_download.<locals>.<lambda>X  s    $r5   c                   > [         R                  TR                  R                  S-   [	        U 5      STR
                  0S9$ )Nz.store.stat_filer  exc_infor   )r  r  rl  rD   r$   r  )r   r^   rW   s    r3   r  r  Z  s:    fll''*<<,Q/- # r5   )r  rK   rj   zFileInfo | None)rM  r   rp  rd   r   
addErrback)rW   r  r^   r  r   dfddfd2r2   s   ```    @r3   media_to_downloadFilesPipeline.media_to_download3  sm    	 	< ~~gDt~<"/

0D0DdD"Q*-//**E'	
 r5   c                    [        UR                  [        5      (       dE  [        U5      n[        R                  SU R                  UUUR                  S.SUR                  0S9  [        e)NzoFile (unknown-error): Error downloading %(medianame)s from %(request)s referred in <%(referer)s>: %(exception)s)r  r  r  	exceptionr  r  )	rm  r   r   r(   r  r  rS  r  rA   )rW   failurer  r^   r  s        r3   media_failedFilesPipeline.media_failedb  sg     '--77!'*GNNG "&&&!(	  -  
 r5   c          	     @   [        U5      nUR                  S:w  a9  [        R                  SUR                  X%S.SUR                  0S9  [        S5      eUR                  (       d.  [        R                  SX%S.SUR                  0S9  [        S	5      eS
UR                  ;   a  S
OSn[        R                  SXbUS.SUR                  0S9  U R                  UR                  U5         U R                  X!X4S9nU R                  XX4S9nUR                  UUUS.$ ! [
         a4  n	[        R                  SX%[        U	5      S.SUR                  0SS9  e S n	A	f[         a=  n	[        R                  SX%S.SSUR                  0S9  [        [        U	5      5      eS n	A	ff = f)N   zZFile (code: %(status)s): Error downloading file from %(request)s referred in <%(referer)s>)r  r  r  r  r  zdownload-errorzWFile (empty-content): Empty file from %(request)s referred in <%(referer)s>: no-content)r  r  zempty-contentcached
downloadedzMFile (%(status)s): Downloaded file from %(request)s referred in <%(referer)s>responser^   r  r  z\File (error): Error processing file from %(request)s referred in <%(referer)s>: %(errormsg)s)r  r  errormsgT)r   r  zVFile (unknown-error): Error processing file from %(request)s referred in <%(referer)s>r  r  )r(   r  r  r  r  rA   bodyflagsr  r  rM  file_downloadedr1   rK  r  r  )
rW   r  r  r^   r  r  r  r2   rM   excs
             r3   media_downloadedFilesPipeline.media_downloadedu  s    g&??c!NN8#??wS-	     011}}NN/#8-	    00%7\gFT[[)	 	 	
 	t{{F+	*>>'4>SD++Ht+OH* ;; 	
 	
'  	NN:#SXN-    	*LL,#8-    C))	*s$   ) D 
F$/EF 8FFc                    UR                   R                  (       d   eUR                   R                  R                  SUS9  UR                   R                  R                  SU 3US9  g )N
file_count)r  zfile_status_count/)r^  stats	inc_value)rW   r  r  s      r3   r  FilesPipeline.inc_stats  sW    ~~####&&|F&C&&);F8'DV&Tr5   c                    [        U5      R                  U R                  / 5      nU Vs/ s H  n[        U[        S9PM     sn$ s  snf )N)callback)r   rw  rx  r   r   )rW   r  r^   urlsr>  s        r3   get_media_requests FilesPipeline.get_media_requests  s@     4 $$T%:%:B?:>?$QK0$???s   Ac                   U R                  X!X4S9n[        UR                  5      n[        U5      nUR	                  S5        U R
                  R                  XVU5        U$ )Nr  r   )rM  r   r  r?   r   rp  ra   )rW   r  r  r^   r  r2   r]   rM   s           r3   r  FilesPipeline.file_downloaded  sQ     ~~gt~Ohmm$3<

40r5   c                    [        [        5         U VVs/ s H  u  pEU(       d  M  UPM     snn[        U5      U R                  '   S S S 5        U$ s  snnf ! , (       d  f       U$ = fr0   )r   r   r   ry  )rW   resultsr  r^   okxs         r3   item_completedFilesPipeline.item_completed  sS     hIP9WTV!9WKd556   :X  s    AAAAA
A&c                  [         R                  " [        UR                  5      5      R	                  5       n[        UR                  5      R                  nU[        R                  ;  aP  Sn[        R                  " UR                  5      S   nU(       a$  [        [        [        R                  " U5      5      nSU U 3$ )NrV  r   zfull/)r7   sha1r'   r  r;   r   suffix	mimetypes	types_map
guess_typer   r1   guess_extension)rW   r  r  r^   r  
media_guid	media_ext
media_types           r3   rM  FilesPipeline.file_path  s     \\(7;;"78BBD
%,,	 I///I"--gkk:1=J i&?&?
&KL	zl9+..r5   )rh  rg  ru  ry  rx  rp  rf   )r{  r   ri  z,Callable[[Request, Spider], Response] | Nonere  z Settings | dict[str, Any] | Noner^  Crawler | None)re  r   rj   r,   )r^  r.   rj   r,   )re  r   r^  r  rj   r,   )re  r   rj   r   )r   r1   rj   rS   )r  r   r^   rg   r  r   rj   zDeferred[FileInfo | None])r  r+   r  r   r^   rg   rj   r   )
r  r   r  r   r^   rg   r  r   rj   r   )r  r-   r  r1   rj   r   )r  r   r^   rg   rj   zlist[Request])
r  r   r  r   r^   rg   r  r   rj   r1   )r  zlist[FileInfoOrError]r  r   r^   rg   rj   r   )
r  r   r  zResponse | Noner^   r   r  r   rj   r1   ) rD   rE   rF   rG   rH   rS  rP   rU  rl   r   r  r1  rX  rZ  r\  rY   classmethodr  r  r  r  ro  r  r  r  r  r  r  r  rM  rI   __classcell__)rl  s   @r3   rR  rR    sV   $ JGS:M6  %0c/&--
 GK59	-
 #'-
&-
 D-
 3	-
  -
 -
^ 2 2 = =    P P0 PT--&>-IL-	"-^)08P	2 @
@
 @
 '	@
 @
 
@
DU@@7@	@   '	  
,47?W	 %)04	/ // "/ .	/ / 
/ /r5   rR  )r2   r   rj   r1   )r<   z	IO[bytes]rj   r1   )_rH   
__future__r   r  rq  r7   loggingr  r   rj  collectionsr   
contextlibr   ftplibr   ior   pathlibr   typingr	   r
   r   r   r   r   r   urllib.parser   itemadapterr   twisted.internet.deferr   r   twisted.internet.threadsr   scrapy.exceptionsr   r   r   scrapy.httpr   r   scrapy.http.requestr   scrapy.pipelines.mediar   r   r   scrapy.settingsr   r   scrapy.utils.botor    scrapy.utils.datatypesr!   scrapy.utils.deprecater"   scrapy.utils.ftpr#   scrapy.utils.logr$   scrapy.utils.pythonr%   r&   r'   scrapy.utils.requestr(   collections.abcr)   osr*   twisted.python.failurer+   typing_extensionsr,   scrapyr-   scrapy.crawlerr.   	getLoggerrD   r  r4   r?   rK  rA   rK   rS   rl   r   r  r1  rR  rC   r5   r3   <module>r     s   #        #     N N N ! # : 2 T T ) + K K 2 3 6 7 + 0 K K ,(. '& 
		8	$"(I (y 
, ,"0# 0#fw wtH
 H
V;K ;K|L/M L/r5   