
    -hD                     f    S SK r S SKrS SKJrJrJrJr  S SKJr  S SK	J
r
  S SKJr   " S S\5      rg)    N)AnyIteratorLiteralOptional)
BaseLoader)Document)get_from_envc                       \ rS rSrSr SSSSSSS.S\\   S\\   S\\   S	\\   S
\S   S\\   4S jjjr	S\
\   4S jrSrg)FireCrawlLoader
   a	  
FireCrawlLoader document loader integration

Setup:
    Install ``firecrawl-py``,``langchain_community`` and set environment variable ``FIRECRAWL_API_KEY``.

    .. code-block:: bash

        pip install -U firecrawl-py langchain_community
        export FIRECRAWL_API_KEY="your-api-key"

Instantiate:
    .. code-block:: python

        from langchain_community.document_loaders import FireCrawlLoader

        loader = FireCrawlLoader(
            url = "https://firecrawl.dev",
            mode = "crawl"
            # other params = ...
        )

Lazy load:
    .. code-block:: python

        docs = []
        docs_lazy = loader.lazy_load()

        # async variant:
        # docs_lazy = await loader.alazy_load()

        for doc in docs_lazy:
            docs.append(doc)
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Introducing [Smart Crawl!](https://www.firecrawl.dev/smart-crawl)
         Join the waitlist to turn any web
        {'ogUrl': 'https://www.firecrawl.dev/', 'title': 'Home - Firecrawl', 'robots': 'follow, index', 'ogImage': 'https://www.firecrawl.dev/og.png?123', 'ogTitle': 'Firecrawl', 'sitemap': {'lastmod': '2024-08-12T00:28:16.681Z', 'changefreq': 'weekly'}, 'keywords': 'Firecrawl,Markdown,Data,Mendable,Langchain', 'sourceURL': 'https://www.firecrawl.dev/', 'ogSiteName': 'Firecrawl', 'description': 'Firecrawl crawls and converts any website into clean markdown.', 'ogDescription': 'Turn any website into LLM-ready data.', 'pageStatusCode': 200, 'ogLocaleAlternate': []}

Async load:
    .. code-block:: python

        docs = await loader.aload()
        print(docs[0].page_content[:100])
        print(docs[0].metadata)

    .. code-block:: python

        Introducing [Smart Crawl!](https://www.firecrawl.dev/smart-crawl)
         Join the waitlist to turn any web
        {'ogUrl': 'https://www.firecrawl.dev/', 'title': 'Home - Firecrawl', 'robots': 'follow, index', 'ogImage': 'https://www.firecrawl.dev/og.png?123', 'ogTitle': 'Firecrawl', 'sitemap': {'lastmod': '2024-08-12T00:28:16.681Z', 'changefreq': 'weekly'}, 'keywords': 'Firecrawl,Markdown,Data,Mendable,Langchain', 'sourceURL': 'https://www.firecrawl.dev/', 'ogSiteName': 'Firecrawl', 'description': 'Firecrawl crawls and converts any website into clean markdown.', 'ogDescription': 'Turn any website into LLM-ready data.', 'pageStatusCode': 200, 'ogLocaleAlternate': []}

Ncrawl)queryapi_keyapi_urlmodeparamsurlr   r   r   r   )r   scrapemapextractsearchr   c                    SSK Jn  US;  a  [        SU S35      eUS;   a  U(       d  [        S5      eUS	:X  a/  U(       d(  U(       a  UR	                  S
5      (       d  [        S5      eU=(       d    [        SS5      nU=(       d    [        R                  " S5      =(       d    SnU" X8S9U l         U=(       d    SU l        XPl	        U=(       d    0 U l
        Ub  X R                  S
'   gg! [         a    [        S5      ef = f)aQ  Initialize with API key and url.

Args:
    url: The url to be crawled.
    api_key: The Firecrawl API key. If not specified will be read from env var
        FIRECRAWL_API_KEY. Get an API key
    api_url: The Firecrawl API URL. If not specified will be read from env var
        FIRECRAWL_API_URL or defaults to https://api.firecrawl.dev.
    mode: The mode to run the loader in. Default is "crawl".
         Options include "scrape" (single url),
         "crawl" (all accessible sub pages),
         "map" (returns list of links that are semantically related).
         "extract" (extracts structured data from a page).
         "search" (search for data across the web).
    params: The parameters to pass to the Firecrawl API.
        Examples include crawlerOptions.
        For more details, visit: https://github.com/mendableai/firecrawl-py
r   )FirecrawlAppzD`firecrawl` package not found, please run `pip install firecrawl-py`)r   r   r   r   r   r   Invalid mode 'zT'.
                Allowed: 'crawl', 'scrape', 'search', 'map', 'extract', 'search'.)r   r   r   r   z2Url must be provided for modes other than 'search'r   r   z&Query must be provided for search moder   FIRECRAWL_API_KEYFIRECRAWL_API_URLzhttps://api.firecrawl.dev)r   r    N)	firecrawlr   ImportError
ValueErrorgetr	   osgetenvr   r   r   )	selfr   r   r   r   r   r   r   resolved_api_urls	            ^/root/34ku/venv/lib/python3.13/site-packages/langchain_community/document_loaders/firecrawl.py__init__FireCrawlLoader.__init__F   s   :	.
 PP"4& )R U 
 88QRR8Uv&**W:M:MEFFI\)5HI Tryy!45T9T 	 &gP9"	l#(KK  3  	V 	s   C+ +Dreturnc           	   #     #    / nU R                   S:X  ah  1 SknU R                  R                  5        VVs0 s H  u  p4X2;   d  M  X4_M     nnnSUS'   U R                  R                  " U R
                  40 UD6/nGOU R                   S:X  a  U R
                  (       d  [        S5      e1 SknU R                  R                  5        VVs0 s H  u  p4X2;   d  M  X4_M     nnnSUS'   U R                  R                  " U R
                  40 UD6n[        U[        5      (       a6  UR                  S/ 5      n[        U[        5      (       a  [        U5      O/ nGO[        US/ 5      n[        U[        5      (       a  [        U5      O/ nGOsU R                   S	:X  Ga2  U R
                  (       d  [        S
5      e1 SknU R                  R                  5        VVs0 s H  u  p4X2;   d  M  X4_M     nnnSUS'   U R                  R                  " U R
                  40 UD6n[        U[        5      (       a5  UR                  S5      n	[        U	[        5      (       a  [        U	5      O/ nGO[        US5      (       a0  [        US5      n	[        U	[        5      (       a  [        U	5      O/ nGOV[        U[        5      n
U
(       a  [        U5      O/ nGO0U R                   S:X  a  U R
                  (       d  [        S5      e1 SknU R                  R                  5        VVs0 s H  u  p4X2;   d  M  X4_M     nnnSUS'   [        U R                  R                   " U R
                  /40 UD65      /nGOU R                   S:X  Gah  1 SknU R                  R                  5        VVs0 s H  u  p4X2;   d  M  X4_M     nnnSUS'   U R                  R"                  " S,SU R                  R                  S5      0UD6n[        U[        5      (       a  [        U5      nGO/ n/ n[        U[        5      (       a3  UR                  S5      UR                  S5      UR                  S5      /nO&[        USS 5      [        USS 5      [        USS 5      /nSUS   4SUS   4SUS   44 GHC  u  pU(       d  M  U GH-  n[        U[        5      (       d  [        USS 5      OUR                  S5      n[        U[        5      (       d  [        USS 5      OUR                  S5      n[        U[        5      (       d  [        USS 5      OUR                  S5      nU=(       d    U=(       d    U=(       d    SnUU[        U[        5      (       d  [        USS 5      OUR                  S5      US.R                  5        VVs0 s H  u  p4Uc  M
  X4_M     nnnUR%                  UUS.5        GM0     GMF     UnO[        S U R                    S!35      eU GH5  nU R                   S	:X  Ga  [        U[        5      (       a  Un0 nGO[        U[        5      (       a  UR                  S5      =(       d    UR                  S"5      =(       d    Sn[        U[        5      (       a  UO[        U=(       d    S5      nUR                  S5      UR                  S5      S#.R                  5        VVs0 s H  u  p4Uc  M
  X4_M     nnnGO1[        US5      (       d  [        US5      (       az  [        USS5      =(       d    [        US"S5      n[        U[        5      (       a  UO[        U=(       d    S5      n0 n[        USS 5      n[        USS 5      nUb  UUS'   Ub  UUS'   GO[        U5      n0 nGOU R                   S:X  a  [        U5      n0 nGOgU R                   S:X  a  [        U[        5      (       ar  UR                  S$5      =(       d    Sn[        U[        5      (       a  UO[        U=(       d    S5      nUR                  S%0 5      n[        U[        5      (       a  UO0 nGO[        U5      n0 nGO[        U[        5      (       a  UR                  S$5      =(       d*    UR                  S&5      =(       d    UR                  S'S5      n[        U[        5      (       a  UO[        U=(       d    S5      nUR                  S%0 5      nOv[        US$S 5      =(       d!    [        US&S 5      =(       d    [        US'S5      n[        U[        5      (       a  UO[        U=(       d    S5      n[        US%0 5      =(       d    0 n[        U[        5      (       d  [        US(5      (       a+  ['        UR(                  5      (       a  UR)                  5       nOe[*        R,                  " U5      (       a  [*        R.                  " U5      nO3[        US)5      (       a  [        [1        U5      5      nOS*[        U5      0nU(       d  GM)  [3        UUS+9v   GM8     g s  snnf s  snnf s  snnf s  snnf s  snnf s  snnf s  snnf 7f)-Nr   >   proxymobileactionsformatsheadersmax_ageparserstimeoutlocationuse_mockwait_for	block_ads	fast_modeexclude_tagsinclude_tagsstore_in_cacheonly_main_contentremove_base64_imagesskip_tls_verification	langchainintegrationr   zURL is required for crawl mode>   delaylimitpromptr2   webhookexclude_pathsinclude_pathspoll_intervalignore_sitemapscrape_optionsmax_concurrencyallow_subdomainscrawl_entire_domainmax_discovery_depthzero_data_retentionallow_external_linksignore_query_parametersdatar   zURL is required for map mode>   rA   r   sitemapr2   r3   include_subdomainslinksr   z URL is required for extract mode>   agentrB   schemar2   show_sourcesrF   system_promptrH   enable_web_searchignore_invalid_urlsrN   r   >   tbsrA   sourcesr2   r3   
categoriesrH   rY   r   webnewsimagesr         r   titledescriptionr   category)r   rb   rd   type)markdownmetadatar   zJ'.
                Allowed: 'crawl', 'scrape', 'map', 'extract', 'search'.href)rb   rc   rf   rg   htmlrawHtml
model_dump__dict__value)page_contentrg    )r   r   itemsr   r   r   r    r   
isinstancedictr!   listgetattrr   hasattrstrr   r   appendcallablerk   dataclassesis_dataclassasdictvarsr   )r$   firecrawl_docsallowedkvkwargscrawl_responserP   map_responserS   is_listsearch_dataresults
containerskindrp   itemurl_val	title_valdesc_valcontent_valmetadata_valdocrn   metapage_content_valuerb   rc   markdown_valuemetadata_objcontent_values                                  r&   	lazy_loadFireCrawlLoader.lazy_load   sL	    $&99 G* (,{{'8'8':K':tqaldad':FK$/F=!"nn33DHHGGHNYY'!88 !ABBG& (,{{'8'8':K':tqaldad':FK$/F=!!^^11$((EfEN.$//%))&"5/9$/E/Ed2~vr:/9$/E/Ed2YY%88 !?@@G (,{{'8'8':K':tqaldad':FK$/F=!>>--dhhA&AL ,--$((10:5$0G0GeRw//g60:5$0G0GeR$\487>l!3BYY)#88 !CDDG (,{{'8'8':K':tqaldad':FK$/F=!!$.."8"8$(("Nv"NOPNYY("	G (,{{'8'8':K':tqaldad':FK$/F=!..// kkoog.28K +t,,!%k!2 13
k400#.#/#1"J  UD9VT:Xt<"J JqM*Z]+z!}-$KD
 !  % $.dD#9#9 $D%6!%%   $.dD#9#9 $D'48!%'!2 " $.dD#9#9 $D->!%-!8 !
 '/&L)&Lw&L" (/)2'1$'='= -4D*d,K%)XXj%9(,) $eg)&()&  ! !AD)& % (  )4,O; !&$N ")"499+ .H K  "CyyE!c3''(+L+-DT**),)P3776?)Pb& &&8#>> + !3!9r: ! &)WWW%5+.77=+A%  %'%"%"DA  %"  D S%((GC,A,A)0eR)@ *GVRE&
 &&8#>> + !3!9r: !
 D#C$7E")#}d"CK((-W"..9]+#&s8LDi'"3xh&c4((%(WWZ%8%>BN &nc:: ' !526 !
 $'77:r#:L+5lD+I+I<rD#&s8LDc4((
+XswwvX#'')UWBX "
 &mS99 & !4"5 !
 77:r2D  Z6 7"357"3	26 " &mS99 & !4"5 !
 #3
B7=2D "$--t\22x7P7P#0$11$77*11$7 z22#DJ/ 'T3) { " L2 L* L< L L`(Ds   4i$h:h:A?i$
i i C9i$i'i-D-i$i)i/A.i$i,i2G;i$-	i
:i
 Di$	i"i(M<i$)r   r   r   r   )N)__name__
__module____qualname____firstlineno____doc__r   rv   r   rr   r'   r   r   r   __static_attributes__ro       r&   r   r   
   s    7z "9)  $!%!%GN!%9)c]9) }	9)
 #9) #9) CD9) 9)vX8H- Xr   r   )ry   r"   typingr   r   r   r   langchain_core.document_loadersr   langchain_core.documentsr   langchain_core.utilsr	   r   ro   r   r&   <module>r      s(     	 3 3 6 - -Oj Or   