
     h                     :    S SK r S SKrS SKrSS jr " S S5      rg)    Nc                    US:X  ae  U R                  S5      R                  5        Vs/ s H)  oDR                  5       (       d  M  UR                  5       PM+     nnSR                  U5      nOUS:X  a   U R                  S5      R	                  5       nOZUS:X  a  U R	                  5       nOCUS:X  a  U R
                  R	                  U5      nO!US:X  a  U R
                  R	                  S	5      nU(       a  UR                  W5      nW$ s  snf )
NTextz	.//text() Linkz.//@hrefHTML	AttributeImagesrc)xpathgetallstripjoingetattribformat)element	item_type	attribute	formatteritextscontents          ^/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/selectorlib/selectorlib.pyextract_fieldr      s    F$+MM+$>$E$E$GU$Gq779$GU((5/	f	--
+//1	f	++-	k	!..$$Y/	g	..$$U+""7+N Vs   DDc                   |    \ rS rSrSrSS jr\SS\4S jj5       r\SS\4S jj5       r	SS	\S
\4S jjr
S rS rSrg)	Extractor   zselector classNc                     Xl         U(       aW  U Vs/ s H'  n[        R                  " U5      (       a  U" 5       OUPM)     nnU Vs0 s H  o3R                  U_M     snU l        g 0 U l        g s  snf s  snf N)configinspectisclassname
formatters)selfr    r$   r   s       r   __init__Extractor.__init__   s_    DNOJq!3!3!#:JJO2<=*Qvvqy*=DO DO P=s   .A-A2yaml_stringc                 :    [         R                  " U5      nU " X2S9$ )zcreate `Extractor` object from yaml string

>>> yaml_string = '''
    title:
        css: "h1"
        type: Text
    '''
>>> extractor = Extractor.from_yaml_string(yaml_string)
r$   )yaml	safe_load)clsr(   r$   r    s       r   from_yaml_stringExtractor.from_yaml_string"   s     ,611    yaml_filenamec                     [        U5       n[        R                  " UR                  5       5      nSSS5        U " WUS9$ ! , (       d  f       N= f)zgcreate `Extractor` object from yaml file

>>> extractor = Extractor.from_yaml_string('selectors.yaml')
Nr*   )openr+   r,   read)r-   r1   r$   yaml_fileobjr    s        r   from_yaml_fileExtractor.from_yaml_file0   s@     - L^^L$5$5$78F !6j11 ! s   %A  
Ahtmlbase_urlc                     [         R                  " XS9nU(       a  UR                  R                  5         0 nU R                  R                  5        H  u  pVU R                  Xc5      XE'   M     U$ )a  
Args:
    html: html string
    base_url (str, optional): specifying the base_url will make all extracted Links absolute
Returns:
    dict: extracted data from given html string

>>> response = requests.get(url)
>>> extractor.extract(response.text, base_url=response.url)
)r9   )parselSelectorrootmake_links_absoluter    items_extract_selector)r%   r8   r9   selfields_dataselector_nameselector_configs          r   extractExtractor.extract:   s^     ood6HH((*.2kk.?.?.A*M)-)?)?)UK& /Br0   c                    UR                  S5      b  UR                  US   5      nO#US   nUS:X  a  U/nOUR                  US   5      nUR                  SS5      nU(       d  g / nU H  nSU;   a  U R                  X5      nOHSUR                  S5      0n	SU;   a  US   U	S'   SU;   a  U R                  US      U	S	'   [        Xu40 U	D6nUR                  S
5      SLa  Us  $ UR                  U5        M     U$ )Nr   css typer   childrenr   r   r   multipleT)r   r   rH   _get_child_itemr$   r   append)
r%   field_configparent_parserelementsrH   r   valuesr   valuekwargss
             r   r@   Extractor._extract_selectorM   s   G$0$**<+@AHu%Cby)?(,,\%-@A $$VV4	G\),,\C%|'7'7'DE,.*6{*CF;'|+*.//,x:P*QF;'%gCFC
+47e$    r0   c                 V    US   n0 nU H  nU R                  X5   U5      nXdU'   M     U$ )NrK   )r@   )r%   rO   r   children_config
child_itemfieldchild_values          r   rM   Extractor._get_child_itemm   s@    &z2
$E001GQK +u % r0   )r    r$   r   )__name__
__module____qualname____firstlineno____doc__r&   classmethodstrr.   r6   rE   r@   rM   __static_attributes__ r0   r   r   r      s^    ! 23 2 2 23 2 2C 3 &@r0   r   )NN)r;   r+   r!   r   r   rd   r0   r   <module>re      s      "[ [r0   