#!/usr/bin/env python3
"""
Kiwi Browser API server — runs inside a Docker container with Chromium.
Provides /health, /visit, and /screenshot endpoints over HTTP.

Place this at /app/server.py inside a Debian container that has chromium
and python3 installed. Then start with:
    python3 /app/server.py

The container must expose port 3000.
"""

import subprocess, json, base64, os, tempfile, re
from http.server import HTTPServer, BaseHTTPRequestHandler

BROWSER = "/usr/bin/chromium"


def clean_json(data):
    return json.dumps(data).encode()


class Handler(BaseHTTPRequestHandler):
    def log_message(self, fmt, *args):
        # Suppress stdout noise
        pass

    def _send_json(self, data, status=200):
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.end_headers()
        self.wfile.write(clean_json(data))

    def do_GET(self):
        if self.path == "/health":
            self._send_json({"status": "ok", "browser": BROWSER})
        else:
            self.send_response(404)
            self.end_headers()

    def do_POST(self):
        length = int(self.headers.get("Content-Length", 0))
        body = self.rfile.read(length) if length else b"{}"
        try:
            data = json.loads(body.decode())
        except Exception:
            data = {}

        if self.path == "/visit":
            self._visit(data)
        elif self.path == "/screenshot":
            self._screenshot(data)
        else:
            self.send_response(404)
            self.end_headers()

    def _extract_title(self, html: str) -> str | None:
        m = re.search(r'<title>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
        return re.sub(r'\s+', ' ', m.group(1)).strip() if m else None

    def _visit(self, data):
        url = data.get("url", "https://example.com")
        selector = data.get("selector", None)

        cmd = [
            BROWSER,
            "--headless",
            "--no-sandbox",
            "--disable-gpu",
            "--disable-dev-shm-usage",
            "--window-size=1920,1080",
            "--timeout=20000",
            "--dump-dom",
            url,
        ]

        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            html = result.stdout[:10000]

            extracted = None
            if selector:
                patterns = [
                    rf'class="[^"]*{re.escape(selector)}[^"]*"[^>]*>([^<]+)',
                    rf'id="{re.escape(selector)}"[^>]*>([^<]+)',
                    rf'<[^>]*class="[^"]*price[^"]*"[^>]*>([^<]+)',
                ]
                for pat in patterns:
                    m = re.search(pat, html, re.IGNORECASE | re.DOTALL)
                    if m:
                        extracted = re.sub(r'<[^>]+>', '', m.group(1)).strip()
                        break

            self._send_json({
                "success": result.returncode == 0,
                "url": url,
                "title": self._extract_title(html),
                "html_preview": html[:2000],
                "extracted": extracted,
                "error": result.stderr[:500] if result.returncode != 0 else None,
            })
        except Exception as exc:
            self._send_json({"success": False, "error": str(exc)}, 500)

    def _screenshot(self, data):
        url = data.get("url", "https://example.com")

        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            out_path = tmp.name

        cmd = [
            BROWSER,
            "--headless",
            "--no-sandbox",
            "--disable-gpu",
            "--disable-dev-shm-usage",
            "--window-size=1920,1080",
            f"--screenshot={out_path}",
            url,
        ]

        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            if os.path.exists(out_path):
                with open(out_path, "rb") as img:
                    b64 = base64.b64encode(img.read()).decode()
                os.unlink(out_path)
                self._send_json({"success": True, "screenshot_base64": b64, "url": url})
            else:
                self._send_json(
                    {"success": False, "error": "no screenshot", "stderr": result.stderr[:200]},
                    500,
                )
        except Exception as exc:
            self._send_json({"success": False, "error": str(exc)}, 500)


if __name__ == "__main__":
    server = HTTPServer(("0.0.0.0", 3000), Handler)
    server.serve_forever()