import asyncio
from asyncio import TaskGroup
from typing import Optional

from loguru import logger
from playwright.async_api import BrowserContext
from playwright.async_api import Page as AsyncPage

from rich.progress import Progress, TaskID


HTML_PAGE_RESULT = []


async def goto_task(
	url: str,
	context_browser: BrowserContext,
	task: TaskID,
	progress: Progress,
	semaphore: asyncio.Semaphore,
	tg_instance: TaskGroup,
	page: AsyncPage,
	page_results: int,
) -> Optional[str]:
	"""Get HTML content from a URL.

	This function will load a page from a given URL,
	get its HTML content and save it to a global list.
	It will also update a progress bar with the task ID.

	Parameters:
		url (str): The URL of the page to be loaded.
		context_browser (BrowserContext): The browser context to be used.
		task (TaskID): The task ID of the current task.
		progress (Progress): The progress bar to be updated.
		semaphore (asyncio.Semaphore): The semaphore to be used to limit the number of concurrent tasks.
		tg_instance (TaskGroup): The task group instance.
		page (AsyncPage): The page object to be used.
		page_results (int): The number of pages to be scrapped.

	Returns:
		Optional[str]: The HTML content of the page if successful, otherwise None.
	"""
	try:
		async with page:
			# Extract page number from URL - modified for 1688.com
			page_num = url.split('beginPage=')[1] if 'beginPage=' in url else '1'
			logger.info(f"Loading page {page_num} ... ")
			await page.goto(url, wait_until="domcontentloaded", timeout=0)
			logger.info(f"get response text from web page {page_num} ... ")
			# Get full page HTML content for 1688.com
			html_body = await page.content()
			progress.start_task(task)
			progress.update(task, advance=100 / page_results)
			global HTML_PAGE_RESULT
			HTML_PAGE_RESULT.append(html_body)
			logger.info(f"Closing page {page_num} ... ")
	except Exception as e:
		page_num = url.split('beginPage=')[1] if 'beginPage=' in url else '1'
		logger.error(f"Error processing page {page_num}: {e}")
		return None


def urls_pusher(words: str, stop_at: int):
	# Modified to use 1688.com instead of alibaba.com
	for i in range(1, stop_at + 1):
		# 1688 uses beginPage parameter for pagination
		yield f"https://s.1688.com/selloffer/offer_search.htm?keywords={words}&beginPage={i}"