
     hZ                        S r SSKJr  SSKJr  SSKJrJrJrJ	r	J
r
JrJrJrJrJr  SSKJr  SSKJr  SSKJrJr  SSKJr  SS	KJr  SS
KJr  \(       a  SSKJr  SS jr " S S5      r g)z;
Item Loader

See documentation in docs/topics/loaders.rst
    )annotations)suppress)
TYPE_CHECKINGAnyCallableDictIterableListMutableMappingOptionalPatternUnion)ItemAdapter)Selector)extract_regexflatten)wrap_loader_context)Identity)arg_to_iter)Selfc                    [        [        5         SU R                  ;  a  U R                  sSSS5        $  SSS5        U $ ! , (       d  f       U $ = f)zz
Allow to use single-argument functions as input or output processors
(no need to define an unused first 'self' argument)
.N)r   AttributeError__qualname____func__)methods    [/root/1688_scrapy/alibaba-scraper/venv/lib/python3.13/site-packages/itemloaders/__init__.pyunbound_methodr   $   sH    
 
.	!f)))?? 
"	!) 
" M 
"	! Ms   A
Ac                  (   \ rS rSr% Sr\rS\S'   \" 5       r	S\S'   \" 5       r
S\S'      S)       S*S	 jjr\S+S
 j5       r\S,S j5       rS-S jrS.S jrSS.           S/S jjrSS.           S/S jjrS0S jrS0S jrSS.         S1S jjrS,S jrS2S jrS3S jrS4S jrS4S jr S5       S6S jjrS7S jrS8S jrSS.           S9S jjrSS.           S9S jjrSS.         S:S jjr       S;S jr!SS.           S<S  jjr"SS.           S<S! jjr#SS.         S=S" jjr$S>S# jr%SS.           S?S$ jjr&SS.           S@S% jjr'SS.         SAS& jjr(SBS' jr)S(r*g)C
ItemLoader/   a	  
Return a new Item Loader for populating the given item. If no item is
given, one is instantiated automatically using the class in
:attr:`default_item_class`.

When instantiated with a :param ``selector`` parameter the :class:`ItemLoader` class
provides convenient mechanisms for extracting data from web pages
using parsel_ selectors.

:param item: The item instance to populate using subsequent calls to
    :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
    :meth:`~ItemLoader.add_jmes` or :meth:`~ItemLoader.add_value`.
:type item: :class:`dict` object

:param selector: The selector to extract data from, when using the
    :meth:`add_xpath` (resp. :meth:`add_css`, :meth:`add_jmes`) or :meth:`replace_xpath`
    (resp. :meth:`replace_css`, :meth:`replace_jmes`) method.
:type selector: :class:`~parsel.selector.Selector` object

The item, selector and the remaining keyword arguments are
assigned to the Loader context (accessible through the :attr:`context` attribute).

.. attribute:: item

    The item object being parsed by this Item Loader.
    This is mostly used as a property so when attempting to override this
    value, you may want to check out :attr:`default_item_class` first.

.. attribute:: context

    The currently active :ref:`Context <loaders-context>` of this Item Loader.
    Refer to <loaders-context> for more information about the Loader Context.

.. attribute:: default_item_class

    An Item class (or factory), used to instantiate items when not given in
    the ``__init__`` method.

    .. warning:: Currently, this factory/class needs to be
        callable/instantiated without any arguments.
        If you are using ``dataclasses``, please consider the following
        alternative::

            from dataclasses import dataclass, field
            from typing import Optional

            @dataclass
            class Product:
                name: Optional[str] = field(default=None)
                price: Optional[float] = field(default=None)

.. attribute:: default_input_processor

    The default input processor to use for those fields which don't specify
    one.

.. attribute:: default_output_processor

    The default output processor to use for those fields which don't specify
    one.

.. attribute:: selector

    The :class:`~parsel.selector.Selector` object to extract data from.
    It's the selector given in the ``__init__`` method.
    This attribute is meant to be read-only.

.. _parsel: https://parsel.readthedocs.io/en/latest/
typedefault_item_classCallable[..., Any]default_input_processordefault_output_processorNc                J   X l         UR                  US9  Uc  U R                  5       nXl        XS'   X@l        X0l        0 U l        [        U5      R                  5        HA  u  pVU R                  R                  U/ 5        U R                  U==   [        U5      -  ss'   MC     g )Nselectoritem)r)   updater#   _local_itemcontextparent_local_valuesr   items_values
setdefaultr   )selfr*   r)   r.   r-   
field_namevalues          r   __init__ItemLoader.__init__z   s     -5)<**,D18,235!,T!2!8!8!:JLL##J3LL$E(::$ ";    c                `    U R                   b  U R                   R                  $ U R                  $ N)r.   r1   r/   r3   s    r   r1   ItemLoader._values   s(    ;;";;&&&%%%r8   c                `    U R                   b  U R                   R                  $ U R                  $ r:   )r.   r*   r,   r;   s    r   r*   ItemLoader.item   s(    ;;";;######r8   c                    U R                  5         U R                  c   eU R                  R                  U5      nUR                  US9  U R                  " SU R
                  U S.UD6nU$ )a=  
Create a nested loader with an xpath selector.
The supplied selector is applied relative to selector associated
with this :class:`ItemLoader`. The nested loader shares the item
with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
:meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
r(   r*   r.    )_check_selector_methodr)   xpathr+   	__class__r*   )r3   rC   r-   r)   	subloaders        r   nested_xpathItemLoader.nested_xpath   sd     	##%}}(((==&&u-)NNJ		$J'J	r8   c                    U R                  5         U R                  c   eU R                  R                  U5      nUR                  US9  U R                  " SU R
                  U S.UD6nU$ )a:  
Create a nested loader with a css selector.
The supplied selector is applied relative to selector associated
with this :class:`ItemLoader`. The nested loader shares the item
with the parent :class:`ItemLoader` so calls to :meth:`add_xpath`,
:meth:`add_value`, :meth:`replace_value`, etc. will behave as expected.
r(   r@   rA   )rB   r)   cssr+   rD   r*   )r3   rI   r-   r)   rE   s        r   
nested_cssItemLoader.nested_css   sd     	##%}}(((==$$S))NNJ		$J'J	r8   )rec                   U R                   " U/UQ7SU0UD6nUc  U $ U(       d,  UR                  5        H  u  pgU R                  Xg5        M     U $ U R                  X5        U $ )a}  
Process and then add the given ``value`` for the given field.

The value is first passed through :meth:`get_value` by giving the
``processors`` and ``kwargs``, and then passed through the
:ref:`field input processor <processors>` and its result
appended to the data collected for that field. If the field already
contains collected data, the new data is added.

The given ``field_name`` can be ``None``, in which case values for
multiple fields may be added. And the processed value should be a dict
with field_name mapped to values.

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

Examples::

    loader.add_value('name', 'Color TV')
    loader.add_value('colours', ['white', 'blue'])
    loader.add_value('length', '100')
    loader.add_value('name', 'name: foo', TakeFirst(), re='name: (.+)')
    loader.add_value(None, {'name': 'foo', 'sex': 'male'})

rL   )	get_valuer0   
_add_valuer3   r4   r5   rL   
processorskwkvs           r   	add_valueItemLoader.add_value   sg    B u?z?b?B?=K% &  OOJ.r8   c                   U R                   " U/UQ7SU0UD6nUc  U $ U(       d,  UR                  5        H  u  pgU R                  Xg5        M     U $ U R                  X5        U $ )z
Similar to :meth:`add_value` but replaces the collected data with the
new value instead of adding it.

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
rL   )rN   r0   _replace_valuerP   s           r   replace_valueItemLoader.replace_value   sj     u?z?b?B?=K##A) &  
2r8   c                    [        U5      nU R                  X5      nU(       a=  U R                  R                  U/ 5        U R                  U==   [        U5      -  ss'   g g r:   )r   _process_input_valuer1   r2   )r3   r4   r5   processed_values       r   rO   ItemLoader._add_value   sQ    E"33JFLL##J3LL$O(DD$ r8   c                ^    U R                   R                  US 5        U R                  X5        g r:   )r1   poprO   )r3   r4   r5   s      r   rX   ItemLoader._replace_value  s"    T*
*r8   c               r  ^ T(       a   [        U5      n[        U4S jU 5       5      nU H)  nUc    U$ Un[        XPR                  5      n U" U5      nM+     U$ ! [         aR  n[        SUR                  R                  < SU< S[        U5      R                  < S[        U5      < S3	5      UeSnAff = f)a  
Process the given ``value`` by the given ``processors`` and keyword
arguments.

Available keyword arguments:

:param re: a regular expression to use for extracting data from the
    given value using :func:`~parsel.utils.extract_regex` method,
    applied before processors
:type re: str or typing.Pattern[str]

Examples:

>>> from itemloaders import ItemLoader
>>> from itemloaders.processors import TakeFirst
>>> loader = ItemLoader()
>>> loader.get_value('name: foo', TakeFirst(), str.upper, re='name: (.+)')
'FOO'
c              3  <   >#    U  H  n[        TU5      v   M     g 7fr:   )r   ).0xrL   s     r   	<genexpr>'ItemLoader.get_value.<locals>.<genexpr>&  s     @%QM"a00%s   NzError with processor  value= error=': ')
r   r   r   r-   	Exception
ValueErrorrD   __name__r"   str)r3   r5   rL   rQ   rR   proc_proces     `     r   rN   ItemLoader.get_value
  s    4 &E@%@@ED}  E&t\\:DU     //Q8H8H#a&R s   A
B6$AB11B6c                    [        U R                  5      n[        U R                  5       H  nU R	                  U5      nUc  M  X1U'   M     UR                  $ )z
Populate the item with the data collected so far, and return it. The
data collected is first passed through the :ref:`output processors
<processors>` to get the final value to assign to each item field.
)r   r*   tupler1   get_output_value)r3   adapterr4   r5   s       r   	load_itemItemLoader.load_item6  sO     dii(-J))*5E &+
# .
 ||r8   c                .   U R                  U5      n[        X R                  5      nU R                  R	                  U/ 5      n U" U5      $ ! [
         a>  n[        SU< SU< S[        U5      R                  < S[        U5      < S3	5      UeSnAff = f)z
Return the collected values parsed using the output processor, for the
given field. This method doesn't populate or modify the item at all.
z#Error with output processor: field=rh   ri   rj   rk   N)
get_output_processorr   r-   r1   getrl   rm   r"   rn   ro   )r3   r4   rp   r5   rr   s        r   rv   ItemLoader.get_output_valueD  s    
 ((4"46  R0	; 	ud1g&6&6A@ 	s   A 
B9BBc                :    U R                   R                  U/ 5      $ )z0Return the collected values for the given field.)r1   r|   )r3   r4   s     r   get_collected_valuesItemLoader.get_collected_valuesT  s    ||
B//r8   c                    [        U SU-  S 5      nU(       d  U R                  USU R                  5      n[        U5      $ )Nz%s_ininput_processor)getattr_get_item_field_attrr%   r   r3   r4   rp   s      r   get_input_processorItemLoader.get_input_processorX  sC    tWz148,,-t/K/KD d##r8   c                    [        U SU-  S 5      nU(       d  U R                  USU R                  5      n[        U5      $ )Nz%s_outoutput_processor)r   r   r&   r   r   s      r   r{   ItemLoader.get_output_processor`  sC    tX
2D9,,.0M0MD d##r8   c                l    [        U R                  5      R                  U5      nUR                  X#5      $ r:   )r   r*   get_field_metar|   )r3   r4   keydefault
field_metas        r   r   ItemLoader._get_item_field_attrh  s,     !+:::F
~~c++r8   c                *   U R                  U5      nUn[        X0R                  5      n U" U5      $ ! [         aV  n[	        SUR
                  R                  < SU< SU< S[        U5      R                  < S[        U5      < S35      UeS nAff = f)NzError with input processor z: field=rh   ri   rj   rk   )	r   r   r-   rl   rm   rD   rn   r"   ro   )r3   r4   r5   rp   rq   rr   s         r   r\   ItemLoader._process_input_valuen  s    ''
3"46	; 	 OO,,G$$F
 
	s   2 
BABBc                b    U R                   c"  [        SU R                  R                  -  5      eg )NzFTo use XPath or CSS selectors, %s must be instantiated with a selector)r)   RuntimeErrorrD   rn   r;   s    r   rB   !ItemLoader._check_selector_method  s6    == 79=9P9PQ  !r8   c               X    U R                   " U40 UD6nU R                  " X/UQ7SU0UD6$ )ax  
Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
value, which is used to extract a list of strings from the
selector associated with this :class:`ItemLoader`.

See :meth:`get_xpath` for ``kwargs``.

:param xpath: the XPath to extract data from
:type xpath: str

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

Examples::

    # HTML snippet: <p class="product-name">Color TV</p>
    loader.add_xpath('name', '//p[@class="product-name"]')
    # HTML snippet: <p id="price">the price is $1200</p>
    loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')

rL   )_get_xpathvaluesrU   r3   r4   rC   rL   rQ   rR   valuess          r   	add_xpathItemLoader.add_xpath  s7    : &&u33~~jK:K"KKKr8   c               X    U R                   " U40 UD6nU R                  " X/UQ7SU0UD6$ )z
Similar to :meth:`add_xpath` but replaces collected data instead of adding it.

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

rL   )r   rY   r   s          r   replace_xpathItemLoader.replace_xpath  s9     &&u33!!*OzObOBOOr8   c               X    U R                   " U40 UD6nU R                  " U/UQ7SU0UD6$ )a  
Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
value, which is used to extract a list of unicode strings from the
selector associated with this :class:`ItemLoader`.

:param xpath: the XPath to extract data from
:type xpath: str

:param re: a regular expression to use for extracting data from the
    selected XPath region
:type re: str or typing.Pattern[str]

Examples::

    # HTML snippet: <p class="product-name">Color TV</p>
    loader.get_xpath('//p[@class="product-name"]')
    # HTML snippet: <p id="price">the price is $1200</p>
    loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')

rL   )r   rN   )r3   rC   rL   rQ   rR   r   s         r   	get_xpathItemLoader.get_xpath  s7    6 &&u33~~f?z?b?B??r8   c                   ^ ^ T R                  5         T R                  c   e[        U5      n[        UU 4S jU 5       5      $ )Nc              3  z   >#    U  H0  nTR                   R                  " U40 TD6R                  5       v   M2     g 7fr:   )r)   rC   getall)rd   rC   rR   r3   s     r   rf   .ItemLoader._get_xpathvalues.<locals>.<genexpr>  s1     UfUt}}**57B7>>@@fs   8;rB   r)   r   r   )r3   xpathsrR   s   ` `r   r   ItemLoader._get_xpathvalues  s<     	##%}}(((V$UfUUUr8   c               T    U R                  U5      nU R                  " X/UQ7SU0UD6$ )an  
Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
instead of a value, which is used to extract a list of unicode strings
from the selector associated with this :class:`ItemLoader`.

See :meth:`get_css` for ``kwargs``.

:param css: the CSS selector to extract data from
:type css: str

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

Examples::

    # HTML snippet: <p class="product-name">Color TV</p>
    loader.add_css('name', 'p.product-name')
    # HTML snippet: <p id="price">the price is $1200</p>
    loader.add_css('price', 'p#price', re='the price is (.*)')

rL   )_get_cssvaluesrU   r3   r4   rI   rL   rQ   rR   r   s          r   add_cssItemLoader.add_css  s2    : $$S)~~jK:K"KKKr8   c               T    U R                  U5      nU R                  " X/UQ7SU0UD6$ )z
Similar to :meth:`add_css` but replaces collected data instead of adding it.

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

rL   )r   rY   r   s          r   replace_cssItemLoader.replace_css   s4     $$S)!!*OzObOBOOr8   c               T    U R                  U5      nU R                  " U/UQ7SU0UD6$ )as  
Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
instead of a value, which is used to extract a list of unicode strings
from the selector associated with this :class:`ItemLoader`.

:param css: the CSS selector to extract data from
:type css: str

:param re: a regular expression to use for extracting data from the
    selected CSS region
:type re: str or typing.Pattern[str]

Examples::

    # HTML snippet: <p class="product-name">Color TV</p>
    loader.get_css('p.product-name')
    # HTML snippet: <p id="price">the price is $1200</p>
    loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
rL   )r   rN   )r3   rI   rL   rQ   rR   r   s         r   get_cssItemLoader.get_css  s2    4 $$S)~~f?z?b?B??r8   c                   ^  T R                  5         T R                  c   e[        U5      n[        U 4S jU 5       5      $ )Nc              3  v   >#    U  H.  nTR                   R                  U5      R                  5       v   M0     g 7fr:   )r)   rI   r   )rd   rI   r3   s     r   rf   ,ItemLoader._get_cssvalues.<locals>.<genexpr>3  s,     G$3t}}((-4466$   69r   )r3   cssss   ` r   r   ItemLoader._get_cssvalues/  s:    ##%}}(((4 G$GGGr8   c               T    U R                  U5      nU R                  " X/UQ7SU0UD6$ )aV  
Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector
instead of a value, which is used to extract a list of unicode strings
from the selector associated with this :class:`ItemLoader`.

See :meth:`get_jmes` for ``kwargs``.

:param jmes: the JMESPath selector to extract data from
:type jmes: str

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader

Examples::

    # HTML snippet: {"name": "Color TV"}
    loader.add_jmes('name')
    # HTML snippet: {"price": the price is $1200"}
    loader.add_jmes('price', TakeFirst(), re='the price is (.*)')
rL   )_get_jmesvaluesrU   r3   r4   jmesrL   rQ   rR   r   s          r   add_jmesItemLoader.add_jmes5  s2    8 %%d+~~jK:K"KKKr8   c               T    U R                  U5      nU R                  " X/UQ7SU0UD6$ )z
Similar to :meth:`add_jmes` but replaces collected data instead of adding it.

:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
rL   )r   rY   r   s          r   replace_jmesItemLoader.replace_jmesT  s4     %%d+!!*OzObOBOOr8   c               T    U R                  U5      nU R                  " U/UQ7SU0UD6$ )aX  
Similar to :meth:`ItemLoader.get_value` but receives a JMESPath selector
instead of a value, which is used to extract a list of unicode strings
from the selector associated with this :class:`ItemLoader`.

:param jmes: the JMESPath selector to extract data from
:type jmes: str

:param re: a regular expression to use for extracting data from the
    selected JMESPath
:type re: str or typing.Pattern

Examples::

    # HTML snippet: {"name": "Color TV"}
    loader.get_jmes('name')
    # HTML snippet: {"price": the price is $1200"}
    loader.get_jmes('price', TakeFirst(), re='the price is (.*)')
rL   )r   rN   )r3   r   rL   rQ   rR   r   s         r   get_jmesItemLoader.get_jmese  s2    4 %%d+~~f?z?b?B??r8   c                   ^  T R                  5         T R                  c   e[        U5      n[        T R                  S5      (       d  [	        S5      e[        U 4S jU 5       5      $ )Njmespathz6Please install parsel >= 1.8.1 to get jmespath supportc              3  v   >#    U  H.  nTR                   R                  U5      R                  5       v   M0     g 7fr:   )r)   r   r   )rd   r   r3   s     r   rf   -ItemLoader._get_jmesvalues.<locals>.<genexpr>  s,     Ot}}--d3::<<r   )rB   r)   r   hasattrr   r   )r3   jmesss   ` r   r   ItemLoader._get_jmesvalues  s]    ##%}}(((E"t}}j11 H  OOOOr8   )r,   r/   r-   r.   r)   )NNN)r*   r   r)   zOptional[Selector]r.   zOptional[ItemLoader]r-   r   )returnzDict[str, List[Any]])r   r   )rC   ro   r-   r   r   r   )rI   ro   r-   r   r   r   )r4   Optional[str]r5   r   rQ   r$   rL   Union[str, Pattern[str], None]rR   r   r   r   )r4   ro   r5   r   r   None)
r5   r   rQ   r$   rL   r   rR   r   r   r   )r4   ro   r   r   )r4   ro   r   	List[Any])r4   ro   r   r$   r:   )r4   ro   r   r   r   r   r   r   )r4   ro   r5   r   r   r   )r   r   )r4   r   rC   Union[str, Iterable[str]]rQ   r$   rL   r   rR   r   r   r   )
rC   r   rQ   r$   rL   r   rR   r   r   r   )r   r   rR   r   r   r   )r4   r   rI   r   rQ   r$   rL   r   rR   r   r   r   )
rI   r   rQ   r$   rL   r   rR   r   r   r   )r   r   r   r   )r4   r   r   ro   rQ   r$   rL   r   rR   r   r   r   )r4   r   r   r   rQ   r$   rL   r   rR   r   r   r   )
r   r   rQ   r$   rL   r   rR   r   r   r   )r   r   r   r   )+rn   
__module__r   __firstlineno____doc__dictr#   __annotations__r   r%   r&   r6   propertyr1   r*   rF   rJ   rU   rY   rO   rX   rN   rx   rv   r   r   r{   r   r\   rB   r   r   r   r   r   r   r   r   r   r   r   r   __static_attributes__rA   r8   r   r    r    /   sh   DL  $#2:*/<3;:0= '+'+	;; %; %	;
 ;* & & $ $( .2)!) ) (	)
 +) ) 
)` .2!  (	
 +  
2E+ .2	** (* +	*
 * 
*X 0$$ 9=,,$',25,	,& .2L!L )L (	L
 +L L 
LJ .2P!P )P (	P
 +P P 
P, .2	@(@ (@ +	@
 @ 
@<V/V7:V	V .2L!L 'L (	L
 +L L 
LJ .2P!P 'P (	P
 +P P 
P, .2	@&@ (@ +	@
 @ 
@:H .2L!L L (	L
 +L L 
LH .2P!P (P (	P
 +P P 
P* .2	@'@ (@ +	@
 @ 
@:Pr8   r    N)r   r$   r   r$   )!r   
__future__r   
contextlibr   typingr   r   r   r   r	   r
   r   r   r   r   itemadapterr   parselr   parsel.utilsr   r   itemloaders.commonr   itemloaders.processorsr   itemloaders.utilsr   typing_extensionsr   r   r    rA   r8   r   <module>r      sN    #    $  / 2 + )&[	P [	Pr8   