#!/usr/bin/env python3
"""Scan all eBooks in /media/HDD_1TB/Medien/Bücher/ and report metadata status.
Prints a JSON array with title, author, year, publisher, ISBN, cover status,
and a metadata_status field (complete/partial/bare/error).
"""
import zipfile, json, os, glob, re, xml.etree.ElementTree as ET

DIRPATH = "/media/HDD_1TB/Medien/Bücher"
NS = {
    "opf": "http://www.idpf.org/2007/opf",
    "dc": "http://purl.org/dc/elements/1.1/",
}

files = glob.glob(DIRPATH + "/**/*", recursive=True)
results = []

for f in files:
    if not os.path.isfile(f):
        continue
    ext = os.path.splitext(f)[1].lower()
    entry = {
        "path": f.replace(DIRPATH + "/", ""),
        "filename": os.path.basename(f),
        "ext": ext,
        "size_kb": round(os.path.getsize(f) / 1024, 1),
        "title": "",
        "author": "",
        "publisher": "",
        "year": "",
        "language": "",
        "isbn": "",
        "has_cover": False,
        "metadata_status": "unknown",
        "issues": []
    }

    if ext == ".epub":
        try:
            with zipfile.ZipFile(f, 'r') as zf:
                container_xml = zf.read("META-INF/container.xml").decode("utf-8")
                root = ET.fromstring(container_xml)
                ns_c = {"c": "urn:oasis:names:tc:opendocument:xmlns:container"}
                rootfile = root.find(".//c:rootfile", ns_c)
                opf_path = rootfile.get("full-path") if rootfile is not None else None

                if opf_path and opf_path in zf.namelist():
                    opf_content = zf.read(opf_path).decode("utf-8")
                    opf_root = ET.fromstring(opf_content)

                    for tag, dc_tag in [("title","title"),("author","creator"),("publisher","publisher"),("language","language")]:
                        elem = opf_root.find(f".//{{http://purl.org/dc/elements/1.1/}}{dc_tag}")
                        if elem is not None and elem.text:
                            entry[tag] = elem.text.strip()

                    date_elem = opf_root.find(".//{http://purl.org/dc/elements/1.1/}date")
                    if date_elem is not None and date_elem.text:
                        m = re.search(r'(\d{4})', date_elem.text)
                        if m:
                            entry["year"] = m.group(1)

                    for ident in opf_root.findall(".//{http://purl.org/dc/elements/1.1/}identifier"):
                        scheme = ident.get("{http://www.idpf.org/2007/opf}scheme", "")
                        if scheme and "isbn" in scheme.lower():
                            entry["isbn"] = ident.text.strip()
                            break

                    for item in opf_root.findall(".//{http://www.idpf.org/2007/opf}item"):
                        props = item.get("properties", "")
                        if props and "cover-image" in props:
                            entry["has_cover"] = True
                            break

                    missing = []
                    if not entry["title"]: missing.append("title")
                    if not entry["author"]: missing.append("author")
                    if not entry["language"]: missing.append("language")

                    if not missing:
                        entry["metadata_status"] = "complete"
                    elif len(missing) <= 2:
                        entry["metadata_status"] = "partial"
                    else:
                        entry["metadata_status"] = "bare"
                    entry["issues"] = missing
                else:
                    entry["metadata_status"] = "error_opf_not_found"
        except Exception as e:
            entry["metadata_status"] = "error"
            entry["issues"] = [str(e)]

    elif ext == ".pdf":
        try:
            import fitz
            doc = fitz.open(f)
            meta = doc.metadata
            doc.close()
            entry["title"] = meta.get("title", "") or ""
            entry["author"] = meta.get("author", "") or ""

            missing = []
            if not entry["title"]: missing.append("title")
            if not entry["author"]: missing.append("author")
            entry["metadata_status"] = "complete" if not missing else "partial" if len(missing) < 2 else "bare"
            entry["issues"] = missing
        except Exception as e:
            entry["metadata_status"] = "error"
            entry["issues"] = [str(e)]

    results.append(entry)

print(json.dumps(results, indent=2, ensure_ascii=False))