#!/usr/bin/env python3
"""
Compare Tandoor nutritional values against BLS 4.0 using the pre-built FDC→BLS mapping.

Usage:
    python3 compare_with_mapping.py [--min-diff 20] [--top 50] [--show-zero] [--no-fuzzy]
"""
import sqlite3
import re
import os
import argparse
from difflib import SequenceMatcher

SKILL_DIR  = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # …/tandoor-manager/
WORKSPACE  = os.path.dirname(os.path.dirname(SKILL_DIR))                 # …/workspace/
BLS_DB     = os.path.join(WORKSPACE, "skills", "bls-manager", "data", "BLS_4_0.db")
TANDOOR_DB = os.path.join(SKILL_DIR, "tandoor_foods.db")
MAPPING_DB = os.path.join(SKILL_DIR, "fdc_bls_mapping.db")

BLS_COMPONENTS = [
    ("ENERCJ", "ENERCJ Energie (Kilojoule)", "energie"),
    ("ENERCC", "ENERCC Energie (Kilokalorien)", "kalorien"),
    ("PROT625", "PROT625 Protein", "protein"),
    ("FAT",     "FAT Fett", "fett"),
    ("CHO",     "CHO Kohlenhydrate, verfügbar", "kohlenhydrate"),
    ("FIBT",    "FIBT Ballaststoffe, gesamt", "ballaststoffe"),
    ("WATER",   "WATER Wasser", "wasser"),
    ("ASH",     "ASH Rohasche", "rohasche"),
    ("NACL",    "NACL Salz (Natriumchlorid)", "salz"),
    ("VITA",    "VITA Vitamin A, Retinol-Äquivalent (RE)", "vitamin a"),
    ("VITD",    "VITD Vitamin D", "vitamin d"),
    ("VITE",    "VITE Vitamin E (Alpha-Tocopherol)", "vitamin e"),
    ("VITC",    "VITC Vitamin C", "vitamin c"),
    ("THIA",    "THIA Vitamin B1 (Thiamin)", "vitamin b1"),
    ("RIBF",    "RIBF Vitamin B2 (Riboflavin)", "vitamin b2"),
    ("NIAEQ",   "NIAEQ Niacin-Äquivalent", "niacin"),
    ("VITB6",   "VITB6 Vitamin B6", "vitamin b6"),
    ("FOL",     "FOL Folat-Äquivalent", "folat"),
    ("VITB12",  "VITB12 Vitamin B12 (Cobalamine)", "vitamin b12"),
    ("VITK",    "VITK Vitamin K", "vitamin k"),
    ("NA",      "NA Natrium", "natrium"),
    ("K",       "K Kalium", "kalium"),
    ("CA",      "CA Calcium", "calcium"),
    ("MG",      "MG Magnesium", "magnesium"),
    ("P",       "P Phosphor", "phosphor"),
    ("FE",      "FE Eisen", "eisen"),
    ("ZN",      "ZN Zink", "zink"),
    ("CU",      "CU Kupfer", "kupfer"),
    ("MN",      "MN Mangan", "mangan"),
    ("ID",      "ID Iodid", "iodid"),
]


def similarity(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def preprocess(s):
    s = re.sub(r"[¶\s]+", " ", s.strip().lower())
    s = re.sub(r",\s*(klein|mittelgo|rot|gelb|braun|weiß)", "", s)
    return s


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--min-diff",  type=float, default=20,
                    help="Relative diff threshold in %% (default: 20)")
    ap.add_argument("--top",       type=int, default=50)
    ap.add_argument("--show-zero", action="store_true")
    ap.add_argument("--no-fuzzy",  action="store_true",
                    help="Skip fuzzy matches (use only exact + manual)")
    args = ap.parse_args()

    print("=" * 70)
    print("TANDOOR vs BLS 4.0  –  Nährwert-Abgleich (Mapping-basiert)")
    print("=" * 70)

    # ── load mapping ──────────────────────────────────────────────────────────
    print("\n[1] Loading FDC→BLS mapping …")
    mc = sqlite3.connect(MAPPING_DB)
    mc.execute("PRAGMA journal_mode=READ")
    cur = mc.cursor()
    cur.execute("SELECT fdc_id, bls_code, match_type, confidence FROM fdc_bls_map")
    mapping = {}
    for r in cur.fetchall():
        fdc_id, bls_code, match_type, confidence = r
        if match_type == "fuzzy" and args.no_fuzzy:
            continue
        mapping[fdc_id] = {"bls_code": bls_code, "match_type": match_type, "confidence": confidence}
    print(f"    {len(mapping)} foods in mapping")

    # ── load BLS ──────────────────────────────────────────────────────────────
    print("[2] Loading BLS 4.0 …")
    bc = sqlite3.connect(BLS_DB).cursor()
    bc.execute("SELECT * FROM bls_daten")
    all_rows = bc.fetchall()
    bc.execute("SELECT * FROM bls_daten LIMIT 1")
    all_cols = [d[0] for d in bc.description]
    bls_by_code = {}
    for row in all_rows:
        rec = dict(zip(all_cols, row))
        code = rec.get("BLS Code", "")
        if code:
            bls_by_code[code] = rec
    print(f"    {len(bls_by_code)} BLS entries")

    # resolve column names
    col_map = {}
    for bls_code, col_pat, _ in BLS_COMPONENTS:
        pat = col_pat.lower()
        matches = [c for c in all_cols if pat in c.lower()]
        col_map[bls_code] = matches[0] if matches else None
    found = sum(1 for v in col_map.values() if v)
    print(f"    {found}/{len(BLS_COMPONENTS)} columns resolved")

    # ── load Tandoor ──────────────────────────────────────────────────────────
    print("[3] Loading Tandoor …")
    tc = sqlite3.connect(TANDOOR_DB).cursor()
    tc.execute("SELECT id, name, fdc_id FROM foods")
    tandoor_foods = tc.fetchall()
    print(f"    {len(tandoor_foods)} foods")

    # ── compare ───────────────────────────────────────────────────────────────
    print("\n[4] Vergleiche …\n")
    results    = []
    zero_misses = []
    no_mapping = []

    for tid, tname, fdc_id in tandoor_foods:
        if fdc_id not in mapping:
            no_mapping.append(tname)
            continue
        m = mapping[fdc_id]
        bls_rec = bls_by_code.get(m["bls_code"])
        if not bls_rec:
            no_mapping.append(f"{tname} (BLS {m['bls_code']} not found)")
            continue

        tc.execute("""
            SELECT pt.name, p.property_amount
            FROM properties p
            JOIN property_types pt ON pt.id = p.property_type_id
            WHERE p.food_id=?
        """, (tid,))
        tprops = {row[0].strip().lower(): row[1] for row in tc.fetchall()}

        for bls_code, _, t_keyword in BLS_COMPONENTS:
            col = col_map.get(bls_code)
            if not col:
                continue
            bls_raw = bls_rec.get(col)
            if bls_raw is None or str(bls_raw).strip() in ("-", ""):
                bls_val = None
            else:
                try:
                    bls_val = float(bls_raw)
                except (ValueError, TypeError):
                    bls_val = None

            t_val = None
            for prop_name, prop_val in tprops.items():
                if t_keyword in prop_name or prop_name in t_keyword:
                    t_val = prop_val
                    break

            if t_val == 0 and bls_val is not None:
                zero_misses.append({"food": tname, "component": bls_code,
                                    "tandoor": 0, "bls": round(bls_val, 3)})
                continue
            if t_val is None or bls_val is None:
                continue

            rel = abs(t_val - bls_val) / abs(bls_val) if bls_val != 0 else None
            if rel is not None and rel > args.min_diff / 100:
                results.append({
                    "food":      tname,
                    "bls_food":  bls_rec.get("Lebensmittelbezeichnung", ""),
                    "component": bls_code,
                    "tandoor":   round(t_val, 4),
                    "bls":       round(bls_val, 4),
                    "rel":       rel,
                    "match":     m["match_type"],
                })

    # ── report ────────────────────────────────────────────────────────────────
    if results:
        results.sort(key=lambda x: -x["rel"])
        print(f"{'Lebensmittel':<26} {'BLS':<26} {'Ktlf':>8} {'BLS':>8} {'Δrel':>7}  Typ")
        print("-" * 90)
        for d in results[:args.top]:
            tag = "🔶" if d["match"] == "fuzzy" else ("  " if d["match"] == "exact" else "  ")
            print(
                f"{tag}{d['food'][:25]:<26} {d['bls_food'][:25]:<26} "
                f"{d['tandoor']:>8.3f} {d['bls']:>8.3f} "
                f"{d['rel']:>7.1%}  {d['match']}"
            )

    if args.show_zero and zero_misses:
        zm_by = {}
        for z in zero_misses:
            zm_by.setdefault(z["component"], []).append(z)
        print(f"\n⚠  Tandoor=0 aber BLS hat Daten ({len(zero_misses)}):")
        for comp, rows in sorted(zm_by.items(), key=lambda x: -len(x[1])):
            print(f"     {comp}: {len(rows)} Einträge")

    if no_mapping:
        print(f"\n⚠  Ohne Mapping ({len(no_mapping)}):")
        for n in sorted(no_mapping)[:20]:
            print(f"     – {n}")

    print(f"\nStatistik:")
    print(f"  Verglichen:   {len(tandoor_foods) - len(no_mapping)}/{len(tandoor_foods)}")
    print(f"  Abweichungen: {len(results)}  (≥{args.min_diff}%)")
    print(f"  Tandoor=0:    {len(zero_misses)}")
    print(f"  Kein Mapping: {len(no_mapping)}")


if __name__ == "__main__":
    main()
