#!/usr/bin/env python3
"""
Willhaben Flip-Finder — find underpriced resellable items in Austria.

Strategy: small-capital goods arbitrage. Watch categories with deep, liquid
resale demand and well-known price floors. Flag fresh listings priced at or
below the "instant-flip" threshold. You inspect, buy, relist properly
(good photos, right keywords, sane price), pocket the spread.

Personal-use tool: polite request rate, no mass scraping, read-only.

Usage:
    python3 flipfinder.py                # scan all watches once
    python3 flipfinder.py --hours 24     # only listings from last 24h
    python3 flipfinder.py --reset        # forget seen listings
"""

import argparse
import datetime as dt
import json
import os
import re
import sys
import time
import warnings

warnings.filterwarnings("ignore")
import requests

BASE = "https://www.willhaben.at/iad/kaufen-und-verkaufen/marktplatz"
UA = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
      "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126 Safari/537.36")
SEEN_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "seen.json")

# Each watch: search query, max price worth flagging, realistic resale note.
# Thresholds assume Vienna pickup; tune as you learn the market.
WATCHES = [
    {"q": "lego konvolut",        "max": 50,  "note": "sorted sets resell ~10-15 EUR/kg; minifigs alone often beat the lot price"},
    {"q": "lego kg",              "max": 60,  "note": "buy <=8 EUR/kg, resell sorted/by set 12-18 EUR/kg"},
    {"q": "lego kilo",            "max": 60,  "note": "same kg play, different seller wording"},
    {"q": "lego kiste",           "max": 50,  "note": "box lots priced by volume not content - check photos for sets/figs"},
    {"q": "lego sammlung",        "max": 60,  "note": "collections often priced below part-out value"},
    {"q": "lego minifiguren",     "max": 40,  "note": "figs resell 2-15 EUR each; licensed (SW/Marvel) 5-25 EUR"},
    {"q": "lego star wars",       "max": 40,  "note": "strongest theme; rare figs alone can carry a lot"},
    {"q": "lego technic",         "max": 50,  "note": "big sets resell 60-200 EUR; motors/pneumatics valuable"},
    {"q": "lego eisenbahn",       "max": 60,  "note": "9V/12V train era highly collected; track+motor = money"},
    {"q": "lego duplo kg",        "max": 30,  "note": "duplo resells 5-8 EUR/kg, buy <=3 EUR/kg"},
    {"q": "nintendo switch spiele konvolut", "max": 50, "note": "Switch games hold 15-30 EUR each used"},
    {"q": "gameboy",              "max": 35,  "note": "working GB/GBC/GBA 40-80 EUR; even defekt has parts value"},
    {"q": "nintendo wii konvolut", "max": 25, "note": "console+games bundles split well; Mario titles 15-30 EUR each"},
    {"q": "kitchenaid",           "max": 80,  "note": "working stand mixers resell 150-250 EUR"},
    {"q": "dyson",                "max": 60,  "note": "V8+ vacuums resell 120-250 EUR; battery swap is cheap"},
    {"q": "thermomix",            "max": 150, "note": "TM5/TM6 resell 350-700 EUR; verify model + function"},
    {"q": "makita",               "max": 50,  "note": "18V pro tools liquid; batteries alone 30-50 EUR"},
    {"q": "sonos",                "max": 60,  "note": "Play:1/One resell 90-150 EUR"},
    {"q": "airpods pro",          "max": 60,  "note": "genuine Pro 2 resell 120-160 EUR; beware fakes - serial check"},
    {"q": "ipad",                 "max": 80,  "note": "9th gen+ resell 150+; check iCloud lock before buying!"},
    {"q": "spiegelreflex objektiv", "max": 60, "note": "Canon/Nikon glass holds value; resell on mpb/ebay"},
    {"q": "dachbodenfund",        "max": 50,  "note": "estate clear-outs = mispriced lots; inspect for anything above"},
]

# words that signal motivated sellers / bulk mispricing
HOT_WORDS = re.compile(
    r"konvolut|muss weg|schnell|aufl[öo]sung|dachboden|keller|erbe|nachlass|"
    r"umzug|sammlung", re.I)

session = requests.Session()
session.headers["User-Agent"] = UA


def fetch_listings(query, price_to):
    params = {"keyword": query, "PRICE_TO": int(price_to), "sort": 1}  # newest
    for attempt in range(3):
        try:
            r = session.get(BASE, params=params, timeout=20)
            if r.status_code == 200:
                break
        except requests.RequestException:
            pass
        time.sleep(2 + attempt)
    else:
        return []
    m = re.search(
        r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>',
        r.text, re.S)
    if not m:
        return []
    try:
        data = json.loads(m.group(1))
    except ValueError:
        return []
    ads = find_ads(data)
    lst = ads.get("advertSummary", []) if isinstance(ads, dict) else (ads or [])
    out = []
    for ad in lst:
        attrs = {}
        for a in (ad.get("attributes", {}) or {}).get("attribute", []):
            if a.get("values"):
                attrs[a["name"]] = a["values"][0]
        price = None
        try:
            price = float(attrs.get("PRICE", "x"))
        except ValueError:
            pass
        url = attrs.get("SEO_URL", "")
        out.append({
            "id": str(ad.get("id", "")),
            "title": ad.get("description", "?"),
            "price": price,
            "price_disp": attrs.get("PRICE_FOR_DISPLAY", "?"),
            "loc": attrs.get("LOCATION", "?"),
            "published": attrs.get("PUBLISHED_String", ""),
            "url": ("https://www.willhaben.at/iad/" + url.lstrip("/"))
                   if url else "",
        })
    return out


def find_ads(o):
    if isinstance(o, dict):
        if "advertSummaryList" in o:
            return o["advertSummaryList"]
        for v in o.values():
            r = find_ads(v)
            if r:
                return r
    elif isinstance(o, list):
        for v in o:
            r = find_ads(v)
            if r:
                return r
    return None


def load_seen():
    try:
        with open(SEEN_FILE) as f:
            return set(json.load(f))
    except (IOError, ValueError):
        return set()


def save_seen(seen):
    with open(SEEN_FILE, "w") as f:
        json.dump(sorted(seen)[-5000:], f)


def age_hours(published, now):
    try:
        ts = dt.datetime.fromisoformat(published.replace("Z", "+00:00"))
        return (now - ts).total_seconds() / 3600.0
    except ValueError:
        return None


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--hours", type=float, default=48.0,
                    help="only listings newer than this many hours")
    ap.add_argument("--only", default=None,
                    help="only watches whose query contains this word, e.g. lego")
    ap.add_argument("--reset", action="store_true")
    args = ap.parse_args()

    if args.reset and os.path.exists(SEEN_FILE):
        os.remove(SEEN_FILE)

    now = dt.datetime.now(dt.timezone.utc)
    seen = load_seen()
    watches = [w for w in WATCHES
               if not args.only or args.only.lower() in w["q"].lower()]
    hits, n_total = [], 0
    for w in watches:
        listings = fetch_listings(w["q"], w["max"])
        n_total += len(listings)
        for ad in listings:
            if not ad["id"] or ad["id"] in seen:
                continue
            seen.add(ad["id"])
            if ad["price"] is None or ad["price"] > w["max"]:
                continue
            h = age_hours(ad["published"], now)
            if h is not None and h > args.hours:
                continue
            ad["watch"] = w
            ad["hot"] = bool(HOT_WORDS.search(ad["title"]))
            ad["age_h"] = h
            hits.append(ad)
        time.sleep(1.2)  # be polite
    save_seen(seen)

    hits.sort(key=lambda a: (not a["hot"], a["age_h"] if a["age_h"] is not None else 99))
    print("# Flip-Finder — %s  (%d listings scanned, %d new hits)\n"
          % (now.strftime("%Y-%m-%d %H:%M UTC"), n_total, len(hits)))
    if not hits:
        print("No new candidates. Rerun in a few hours — freshness is the edge.")
    for ad in hits:
        flag = "HOT " if ad["hot"] else ""
        age = ("%.1fh ago" % ad["age_h"]) if ad["age_h"] is not None else "?"
        print("%s[%s] %s — %s | %s | %s" % (
            flag, ad["watch"]["q"], ad["title"][:70], ad["price_disp"],
            ad["loc"], age))
        print("    resale: %s" % ad["watch"]["note"])
        if ad["url"]:
            print("    %s" % ad["url"])
        print()


if __name__ == "__main__":
    sys.exit(main())
