#!/usr/bin/env python3
"""
Riddermark Bil — Scraper standalone
Next.js avec __NEXT_DATA__ contenant carsJson.
Le listing /kopa-bil/ charge ~39 voitures à la fois (premier batch).
On filtre sur les marques premium cibles.
"""

import json
import re
import urllib.request

LISTING_URL = "https://www.riddermarkbil.se/kopa-bil/"
HEADERS = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"}

PREMIUM_BRANDS = {
    "Mercedes-Benz", "BMW", "Porsche", "Audi", "Volvo",
    "Jaguar", "Land Rover", "Lexus", "Ferrari", "Lamborghini"
}


def fetch_cars_json():
    """Extrait carsJson depuis __NEXT_DATA__ du listing."""
    req = urllib.request.Request(LISTING_URL, headers=HEADERS)
    html = urllib.request.urlopen(req, timeout=30).read().decode("utf-8")

    m = re.search(r'<script id="__NEXT_DATA__"[^>]*>([\s\S]*?)</script>', html)
    if not m:
        raise RuntimeError("__NEXT_DATA__ non trouvé dans le HTML")

    nd = json.loads(m.group(1))
    return nd["props"]["pageProps"]["carsJson"]


def normalize(car):
    """Convertit un véhicule Riddermark en format standardisé AutoPremium."""
    make = car.get("make", "")
    model = car.get("model", "")
    desc = car.get("modelDescription", "")
    title = car.get("title", f"{make} {model}")
    plate = car.get("licenseplate", "").lower()

    # URL fiche individuelle
    url = f"https://www.riddermarkbil.se/kopa-bil/{make.lower().replace(' ', '-')}/{plate}/" if plate else None

    # Première image
    images = car.get("images", [])
    image = images[0]["url"] if images and isinstance(images[0], dict) else None
    # Cover image en fallback
    if not image and car.get("coverImage"):
        image = car["coverImage"].get("url")

    return {
        "titre": title,
        "marque": make,
        "modele": model,
        "description": desc,
        "prix": car.get("price"),
        "devise": "SEK",
        "km": car.get("mileage"),
        "annee": car.get("modelYear"),
        "url": url,
        "image": image,
        "source": "riddermark-bil",
        "pays": "SE",
        "carburant": car.get("fuelType"),
        "boite": car.get("gearboxType"),
        "vin": car.get("vinNumber"),
        "immatriculation": car.get("licenseplate"),
        "lieu": car.get("location", {}).get("name"),
        "vendu": car.get("isSold", False)
    }


def scrape(premium_only=True):
    """Scrape et retourne la liste normalisée, filtrée sur marques premium."""
    raw = fetch_cars_json()
    print(f"  Riddermark Bil — {len(raw)} véhicules dans __NEXT_DATA__")

    vehicles = []
    for car in raw:
        if car.get("isSold"):
            continue
        if premium_only and car.get("make") not in PREMIUM_BRANDS:
            continue
        vehicles.append(normalize(car))

    print(f"  Après filtre premium + non-vendu : {len(vehicles)}")
    return vehicles


if __name__ == "__main__":
    vehicles = scrape()
    print()
    for v in vehicles:
        prix_str = f"{v['prix']:,} SEK".replace(",", " ") if v["prix"] else "N/A"
        print(f"  {v['titre']}")
        print(f"    {v.get('annee','?')} · {v.get('km','?')} km · {prix_str}")
        print(f"    URL: {v['url']}")
        print()

    with open("riddermark_results.json", "w", encoding="utf-8") as f:
        json.dump(vehicles, f, ensure_ascii=False, indent=2)
    print(f"  Sauvegardé dans riddermark_results.json")
