%matplotlib inline
from typing import Optional

from cartopy import crs
import numpy as np
from matplotlib import pyplot as plt
import freva
import xarray as xr


def get_rotated_pole(dset: xr.Dataset) -> Optional[tuple[float, float]]:
    """Get the coordinates of a rocted pole, if none given return None."""
    rot_pole_var = [v for v in map(str, dset.data_vars) if "rotated" in v]
    if rot_pole_var:
        pole = dset[rot_pole_var[0]].attrs
        return pole["grid_north_pole_longitude"], pole["grid_north_pole_latitude"]
    return None

def transform_points(X: np.ndarray, Y: np.ndarray, source_proj: crs.Projection, target_proj: crs.Projection) -> tuple[np.ndarray, np.ndarray]:
    """Transform points in longitude and laititude array."""
    if len(Y.shape) == 1:
        Y, X = np.meshgrid(Y, X)
    lon_p, lat_p, _ = target_proj.transform_points(source_proj, X, Y).T
    return lon_p, lat_p

def weighted_mean(dataset: xr.Dataset) -> xr.Dataset:
        """Calculate a weighted area average of a input dataset.

        Parameters
        ----------
        dataset: xarray.Dataset
            Input dataset that should be averaged

        Returns
        -------
        xarray.Dataset
            Weighted field mean of the dataset

        """
        lat_name = dataset.dims[-2]
        lon_name = dataset.dims[-1]
        weight = np.cos(np.deg2rad(dataset[lat_name].values))
        if len(dataset[lat_name].shape) == 1:
            _, weight = np.meshgrid(np.ones_like(dataset[lon_name]), weight)
        x_weight = xr.DataArray(
            weight,
            dims=(lat_name, lon_name),
            coords={lon_name: dataset[lon_name], lat_name: dataset[lat_name]},
        )
        return (x_weight * dataset).sum(
            dim=(lat_name, lon_name), keep_attrs=True
        ) / weight.sum()

def plot_comparison(variable: str, *dsets: xr.Dataset, cmap="PuRd", min=2) -> None:
    """Plot a comparison of datasets."""
    valid_dsets = [dset for dset in dsets if variable in dset.data_vars]
    if (len(valid_dsets) < min) or not valid_dsets:
        print(f"No enough datasets contain the variable '{variable}'.")
        return  

    ncols = len(valid_dsets)
    fig = plt.figure(figsize=(ncols * 4, 6))
    transform = crs.PlateCarree()
    cbar_kwargs = {'extend': 'both', 'orientation': 'horizontal'}
    target_proj = crs.PlateCarree()

    for nn, dset in enumerate(valid_dsets):
        dims = dset[variable].dims
        rot_pole = get_rotated_pole(dset)
        proj = crs.RotatedPole(*rot_pole) if rot_pole else crs.PlateCarree()

        lon, lat = dset["lon"].values, dset["lat"].values

        if nn == 0:
            central_lon = (lon.max() - lon.min()) / 2
            bbox = [lon.min(), lon.max(), lat.min(), lat.max()]

        if rot_pole:
            plot_proj = crs.RotatedPole(*rot_pole, central_lon)
            plot_dset = dset[variable]
        else:
            plot_dset = dset[variable].sel({
                dims[-1]: slice(*bbox[:2]),
                dims[-2]: slice(*bbox[2:])
            })
            plot_proj = crs.PlateCarree(central_lon)

        mean = float(weighted_mean(plot_dset).values)
        vmin, vmax = plot_dset.quantile([0.02, 0.98]).values

        ax = fig.add_subplot(1, ncols, nn + 1, projection=plot_proj)
        plot_dset.plot(ax=ax, cmap=cmap, cbar_kwargs=cbar_kwargs, transform=proj, vmin=vmin, vmax=vmax)
        ax.coastlines(resolution="10m")
        ax.set_title(f"{dset.attrs.get('product', 'Unknown')} ({variable}) avg: {round(mean, 2)}")

    return ax


time_ini = "2003-01-01"
time_end = "2003-02-28"
variables = ["uvb", "bld", "hfss", "hfls", "rsds", 
             "rstcs", "rlutcs", "rsscs", "rlscs", 
             "rsdt", "rlds", "rss", "rls", "rst", 
             "rlut", "tauu", "tauv", "gwd"] # tauu, tauv not fluxes but have positive down/up
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)


merra_files = freva.databrowser(variable=variables, time=f"{time_ini} to {time_end}", experiment="merra", project="reanalysis", time_frequency="mon")
merra_dset = xr.open_mfdataset(merra_files, decode_times=time_coder).sel(time=slice(time_ini, time_end)).mean(dim="time", keep_attrs=True).load()
merra_dset.attrs["product"] = "merra"


jra25_files = freva.databrowser(variable=variables, time=f"{time_ini} to {time_end}", experiment="jra-25", project="reanalysis", time_frequency="mon")
jra25_dset = xr.open_mfdataset(jra25_files, decode_times=time_coder).sel(time=slice(time_ini, time_end)).mean(dim="time", keep_attrs=True).load()
jra25_dset.attrs["product"] = "jra25"


mpimhr_files = freva.databrowser(variable=variables, time=f"{time_ini} to {time_end}", ensemble="r10i1p1f1", experiment="historical", institute="mpi-m", model="mpi-esm1-2-hr", project="cmip6", time_frequency="mon", time_select="flexible")
mpimhr_dset = xr.open_mfdataset(mpimhr_files, decode_times=time_coder).sel(time=slice(time_ini, time_end)).mean(dim="time", keep_attrs=True).load()
mpimhr_dset.attrs["product"] = "mpi-m-hr"


era5_files = freva.databrowser(variable=variables, time=f"{time_ini} to {time_end}", experiment="era5", project="reanalysis", time_frequency="mon")
era5_dset = xr.open_mfdataset(era5_files, decode_times=time_coder).sel(time=slice(time_ini, time_end)).mean(dim="time", keep_attrs=True).load()
era5_dset.attrs["product"] = "era5"


for variable in ["rlut", "rlutcs"]:
    ax = plot_comparison(variable, mpimhr_dset, merra_dset, jra25_dset, era5_dset)


list_variables = list(merra_dset.variables) + list(mpimhr_dset.variables) + list(era5_dset.variables)
list_variables = {var for var in list_variables if not any(key in var.lower() for key in ["lat", "lon"])}


for variable in list_variables:
    ax = plot_comparison(variable, mpimhr_dset, merra_dset, jra25_dset, era5_dset)

No enough datasets contain the variable 'rsscs'.
No enough datasets contain the variable 'bld'.
No enough datasets contain the variable 'rss'.
No enough datasets contain the variable 'gwd'.
No enough datasets contain the variable 'rstcs'.
No enough datasets contain the variable 'rlscs'.
No enough datasets contain the variable 'rls'.
No enough datasets contain the variable 'rst'.
No enough datasets contain the variable 'uvb'.

ecmwf_var	ecmwf ln	opt1_var	long name	op2_var	long name
ttr	Top net thermal radiationn	rlut (CMIP6, * -1)	TOA Outgoing Longwave Radiation	rlt (made up name, * +1)	TOA Net Downward Longwave Flux
ttrc	Top net thermal radiation, clear sky	rlutcs (CMIP6, * -1)	TOA Outgoing Clear-Sky Longwave Radiation	rltcs (made up name, * +1)	TOA Net Downward Longwave Flux Assuming Clear Sky

var	rea/model	obs
`rlds`	merra	reverse sign, net instead of absolute (?)
`hfss`	merra	reverse sign
`hfls`	merra	reverse sign

comparison of `rlut` and `rlutcs` for ERA5 (getera5 issue #16)¶

other radiative flux variables:¶

comparison of rlut and rlutcs for ERA5 (getera5 issue #16)¶

other radiative flux variables:¶

comparison of `rlut` and `rlutcs` for ERA5 (getera5 issue #16)¶