import os
import re
from playwright.async_api import async_playwright

_LAUNCH_ARGS = [
    '--no-sandbox', '--disable-dev-shm-usage',
    '--disable-gpu', '--single-process',
]

async def extract_mobilede_url(url: str) -> dict:
    """Extrait window.__INITIAL_STATE__ d'une fiche individuelle mobile.de."""
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True, args=_LAUNCH_ARGS)
        page = await browser.new_page()
        try:
            await page.goto(url, wait_until='networkidle', timeout=30000)
            return await page.evaluate('window.__INITIAL_STATE__') or {}
        except Exception as e:
            print(f'[Playwright] fiche erreur: {e}')
            return {}
        finally:
            await browser.close()


# ──────────────────────────────────────────────────────────────────────────────
# SEARCH mobile.de — Firecrawl (Playwright bloqué par protection anti-bot)
# ──────────────────────────────────────────────────────────────────────────────

async def search_mobilede(url: str) -> list:
    """Scrape une page de résultats mobile.de via Firecrawl (Playwright bloqué anti-bot).
    Retourne une liste de dicts {title, prix, km, annee, url, photo, devise}."""
    try:
        from firecrawl import Firecrawl
        api_key = os.environ.get('FIRECRAWL_API_KEY', '')
        if not api_key or api_key.startswith('REMPLACER'):
            print('[Firecrawl] FIRECRAWL_API_KEY non configurée')
            return []
        fc = Firecrawl(api_key=api_key)
        result = fc.scrape(url, formats=['markdown'])
        md = result.markdown if hasattr(result, 'markdown') else ''
        if not md:
            md = result.get('markdown', '') if isinstance(result, dict) else ''
        if not md:
            print('[Firecrawl] Markdown vide')
            return []
        listings = _parse_mobilede_markdown(md)
        print(f'[Firecrawl search] ✅ {len(listings)} annonces parsées')
        return listings
    except Exception as e:
        print(f'[Firecrawl search] ❌ Erreur: {e}')
        return []


def _parse_mobilede_markdown(md: str) -> list:
    """Parse le markdown Firecrawl de mobile.de.
    Stratégie : découper par URL d'annonce et remonter dans le bloc précédent."""
    listings = []
    seen_ids = set()

    # Trouver toutes les URLs d'annonces avec leur position
    url_pattern = re.compile(
        r'\(https://suchen\.mobile\.de/fahrzeuge/details\.html\?id=(\d+)[^)]*\)'
    )

    positions = [(m.group(1), m.start()) for m in url_pattern.finditer(md)]

    for i, (listing_id, url_pos) in enumerate(positions):
        if listing_id in seen_ids:
            continue
        seen_ids.add(listing_id)

        # Bloc = au max 2000 chars avant l'URL (évite la sidebar du premier élément)
        prev_end = positions[i - 1][1] + 100 if i > 0 else 0
        block_start = max(prev_end, url_pos - 2000)
        block = md[block_start:url_pos]

        url = f'https://suchen.mobile.de/fahrzeuge/details.html?id={listing_id}'

        # Titre : premier **bold** sans labels de statut
        title = ''
        for bold in re.findall(r'\*\*([^*]{4,})\*\*', block):
            cleaned = re.sub(
                r'^(Gesponsert|NEU(?=[A-ZÄÖÜ])|Unfallfrei|Reparierter Unfallschaden)+',
                '', bold
            ).strip()
            # Insérer espace entre minuscule et majuscule/chiffre collés (artefact Firecrawl)
            cleaned = re.sub(r'([a-zäöü])([A-ZÄÖÜ0-9])', r'\1 \2', cleaned)
            if cleaned and len(cleaned) > 4:
                title = cleaned
                break
        # Fallback : alt de la première image markdown
        if not title:
            alt_m = re.search(r'!\[([^\]]{4,})\]\(https://img\.classistatic', block)
            if alt_m:
                title = alt_m.group(1).replace('\\*', '').strip()

        # Prix : XX.XXX €
        price_m = re.search(r'([\d]{1,3}(?:\.[\d]{3})*)\s*€', block)
        prix = int(price_m.group(1).replace('.', '')) if price_m else None

        # Année depuis EZ MM/YYYY
        ez_m = re.search(r'EZ\s+\d{2}/(\d{4})', block)
        annee = int(ez_m.group(1)) if ez_m else None

        # Kilométrage
        km_m = re.search(r'([\d]{1,3}(?:\.[\d]{3})*)\s*km\b', block)
        km = int(km_m.group(1).replace('.', '')) if km_m else None

        # Photo classistatic (160w → 640w)
        photo_m = re.search(
            r'(https://img\.classistatic\.de/[^\s)\]]+rule=mo-\d+w)', block
        )
        photo = photo_m.group(1).replace('mo-160w', 'mo-640w') if photo_m else None

        # Filtrer les entrées sans données utiles ou avec des valeurs de sidebar (prix < 1000)
        if (title or prix) and (prix is None or prix >= 1000):
            listings.append({
                'title': title,
                'prix': prix,
                'km': km,
                'annee': annee,
                'url': url,
                'photo': photo,
                'devise': 'EUR',
            })

    return listings