# -*- coding: utf-8 -*-
"""L44 重い空間処理を事前にキャッシュするスクリプト。
本記事 .py 実行時はこのキャッシュ CSV/GPKG を読み込むだけで完走する (要件 S)。

生成ファイル:
  data/extras/L44_storm_surge/_cache/per_city_rank.csv
  data/extras/L44_storm_surge/_cache/per_rank_dissolved/{scenario}.gpkg
"""
from __future__ import annotations
import sys, time, json, zipfile
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
DEST = ROOT / "data" / "extras" / "L44_storm_surge"
CACHE = DEST / "_cache"
CACHE.mkdir(parents=True, exist_ok=True)

import geopandas as gpd
import pandas as pd
import shapely

TARGET_CRS = "EPSG:6671"

def fix_invalid(gdf):
    geoms = gdf.geometry.values
    inv = ~shapely.is_valid(geoms)
    if inv.sum() == 0:
        return gdf
    new = []
    for g, isinv in zip(geoms, inv):
        new.append(g.buffer(0) if isinv else g)
    gdf = gdf.copy()
    gdf["geometry"] = new
    return gdf

print("=== L44 cache builder ===")
SCENARIO_PATHS = {
    "30y":  DEST / "30year_20170421/takashio_30year/takashio_30year.shp",
    "ise":  DEST / "ise_20170421/takashio_isewan/takashio_isewan.shp",
    "max":  DEST / "max_20210803/takashio_max.shp",
}

# 1. Dissolve per (scenario, rank) and save GPKG
diss_gdfs = {}
for nm, p in SCENARIO_PATHS.items():
    out_gpkg = CACHE / f"diss_{nm}.gpkg"
    if out_gpkg.exists():
        g = gpd.read_file(out_gpkg)
        diss_gdfs[nm] = g
        print(f"  [{nm}] cache hit: {len(g)} rows")
        continue
    t = time.time()
    g = gpd.read_file(p).to_crs(TARGET_CRS)
    g = fix_invalid(g)
    g["geometry"] = gpd.GeoSeries(shapely.force_2d(g.geometry.values), crs=TARGET_CRS)
    print(f"  [{nm}] read+fix: {len(g)} polys in {time.time()-t:.1f}s")
    t = time.time()
    g_d = g.dissolve(by="SINSUIRANK", as_index=False)
    g_d["area_km2"] = g_d.geometry.area / 1e6
    print(f"  [{nm}] dissolve: {len(g_d)} ranks in {time.time()-t:.1f}s")
    g_d.to_file(out_gpkg, driver="GPKG")
    diss_gdfs[nm] = g_d
    print(f"  [{nm}] saved {out_gpkg.name}")

# 2. Load admin (city planning) zones for spatial join
ADMIN_DIR = ROOT / "data" / "extras" / "L15_admin_zones"
ADMIN_PREF = ADMIN_DIR / "admin_922_広島県.zip"
with zipfile.ZipFile(ADMIN_PREF) as zf:
    name = [n for n in zf.namelist() if n.endswith(".geojson")][0]
    admin = gpd.read_file(f"zip://{ADMIN_PREF}!{name}").to_crs(TARGET_CRS)

# Dissolve by CITY_CD
admin_diss = admin.dissolve(by="CITY_CD", as_index=False)
admin_diss["admin_km2"] = admin_diss.geometry.area / 1e6
print(f"  admin: {len(admin_diss)} cities, total {admin_diss.admin_km2.sum():.1f} km^2")

# 3. Per-(scenario, rank, city) overlay -> CSV
out_csv = CACHE / "per_city_rank.csv"
if out_csv.exists():
    print(f"  per_city_rank.csv already exists, skip")
else:
    rows = []
    for sc, g_d in diss_gdfs.items():
        t = time.time()
        # sjoin first to find candidate (rank, city) pairs
        sj = gpd.sjoin(g_d, admin_diss[["CITY_CD","geometry"]], how="inner", predicate="intersects")
        for _, sjr in sj.iterrows():
            rk = sjr["SINSUIRANK"]
            ccd = sjr["CITY_CD"]
            # actual intersection area
            poly_rk = g_d[g_d["SINSUIRANK"] == rk].geometry.iloc[0]
            poly_ci = admin_diss[admin_diss["CITY_CD"] == ccd].geometry.iloc[0]
            try:
                inter = shapely.intersection(poly_rk, poly_ci)
                a_km2 = inter.area / 1e6
            except Exception:
                a_km2 = 0.0
            rows.append({"scenario": sc, "rank": rk, "city_cd": int(ccd), "area_km2": a_km2})
        print(f"  [{sc}] overlay -> {len(sj)} pairs, {time.time()-t:.1f}s")
    df = pd.DataFrame(rows)
    df.to_csv(out_csv, index=False, encoding="utf-8-sig")
    print(f"  saved {out_csv.name}: {len(df)} rows")

# 4. Save admin city geometry as GPKG (for plotting in lesson)
admin_diss.to_file(CACHE / "admin_diss.gpkg", driver="GPKG")
print(f"  saved admin_diss.gpkg")

print("=== done ===")
