I've updated some of the type hinting/documentation in a lib called pygrib. The source documentation can be found here.
The goal is to extend the Cython api with the associated native type hints and maintain an open ended platform for future growth, IE more methods like to_dataframe()
pygrib uses Cython to read gridded binary files grib
pygrib2.__init__.py
from .core import * from .extension import * pygrib2.core.py
import os from pygrib2.extension import File class Reader: """reader wrapper with typing support around the pygrib open function""" grib_file: File def __init__(self, file_path: str): def _unzip(path: str) -> str: os.system(f'gzip -d {path}') return path.strip('.gz') if file_path.endswith('.gz'): self.file_path = _unzip(file_path) else: self.file_path = file_path def __enter__(self) -> File: self.grib_file = File(self.file_path) return self.grib_file def __exit__(self, *_) -> None: self.grib_file.close() pygrib2.extension.py
from typing import NewType, Type, Union, Any import pandas as pd import pygrib PyGribOpen = NewType('PyGribOpen', Type[pygrib.open]) PyGribMessage = NewType('PyGribMessage', Type[pygrib.gribmessage]) class Message: def __init__(self, message: PyGribMessage): self._message = message def __repr__(self): return str(self._message) def __getattr__(self, key: str) -> Any: """>>> Message.keys()""" return self._message.__getattribute__(key) def __getitem__(self, key: Union[bytes, str]) -> Any: """>>> Message['values']""" return self._message.__getattribute__(key) class File(pygrib.open): def __next__(self): """>>> next(GribFile)""" return Message(super().__next__()) def __getitem__(self, key: Union[int, slice]): """>>> GribFile[1]""" return Message(super().__getitem__(key)) def to_dataframe(self): return pd.DataFrame.from_records(dict(_full_message(grb)) for grb in self) def _full_message(grb: Message): """ ### usage ``` if __name__ == '__main__': with grib.Reader(GRIB_FILE) as grbs: for grb in grbs: x = tuple(grib._full_message(grb)) print(x) ``` """ for k in grb.keys(): try: yield k, grb[k] except RuntimeError: print(k) pygrib2.extension.pyi
from typing import ( Iterator, NewType, TypeVar, Tuple, Union, List, Any ) import numpy as np import pandas as pd import numpy.typing as npt FloatArray = npt.NDArray[np.float_] NData = NewType('NData[float]', FloatArray) NLats = NewType('NLat[float]', FloatArray) NLons = NewType('NLon[float]', FloatArray) class Message: """ ### Grib message object. Each grib message has attributes corresponding to GRIB keys. Parameter names are described by the name, shortName and paramID keys. pygrib also defines some special attributes which are defined below ### Variables - messagenumber - The grib message number in the file. - projparams - A dictionary containing proj4 key/value pairs describing the grid. Set to None for unsupported grid types. - expand_reduced - If True (default), reduced lat/lon and gaussian grids will be expanded to regular grids when data is accessed via values key. If False, data is kept on unstructured reduced grid, and is returned in a 1-d array. - fcstimeunits - A string representing the forecast time units (an empty string if not defined). - analDate - A python datetime instance describing the analysis date and time for the forecast. Only set if forecastTime and julianDay keys exist. - validDate - A python datetime instance describing the valid date and time for the forecast. Only set if forecastTime and julianDay keys exist, and fcstimeunits is defined. If forecast time is a range, then validDate corresponds to the end of the range. """ def data( self, lat1: int = ..., lat2: int = ..., lon1: int = ..., lon2: int = ... ) -> Tuple[NData, NLats, NLons]: """ extract data, lats and lons for a subset region defined by the keywords lat1,lat2,lon1,lon2. The default values of lat1,lat2,lon1,lon2 are None, which means the entire grid is returned. If the grid type is unprojected lat/lon and a geographic subset is requested (by using the lat1,lat2,lon1,lon2 keywords), then 2-d arrays are returned, otherwise 1-d arrays are returned. """ def expand_grid(self, arg=True) -> None: """toggle expansion of 1D reduced grid data to a regular (2D) grid (on by default).""" def has_key(self, key: Any) -> bool: """tests whether a grib message object has a specified key.""" def is_missing(self, key: Any) -> bool: """ returns True if key is invalid or value associated with key is equal to grib missing value flag (False otherwise) """ def keys(self) -> List[str]: """return keys associated with a grib message in a list""" def latlons(self) -> Tuple[NLats, NLons]: """ compute lats and lons (in degrees) of grid. Currently handles regular lat/lon, global gaussian, mercator, stereographic, lambert conformal, albers equal-area, space-view, azimuthal equidistant, reduced gaussian, reduced lat/lon, lambert azimuthal equal-area, rotated lat/lon and rotated gaussian grids. Returns lats,lons numpy arrays containing latitudes and longitudes of grid (in degrees). """ def tostring(self) -> bytes: """ return coded grib message in a binary string. """ def valid_key(self, key: str) -> bool: """ tests whether a grib message object has a specified key, it is not missing and it has a value that can be read """ class File: """extension class for `<class 'pygrib._pygrib.open'>`""" def close(self) -> None: """close GRIB file, deallocate C structures associated with class instance""" def message(self, num: int) -> Message: """retrieve N'th message in iterator. same as seek(N-1) followed by readline().""" def read(self, num: int = None) -> List[Message]: """ read N messages from current position, returning grib messages instances in a list. If N=None, all the messages to the end of the file are read. pygrib.open(f).read() is equivalent to list(pygrib.open(f)), both return a list containing gribmessage instances for all the grib messages in the file f. """ def tell(self) -> Union[int, None]: ... def seek(self, num: int) -> int: ... def __iter__(self) -> Iterator[Message]: ... def __next__(self) -> Message: ... def __getitem__(self, key: Union[int, slice]) -> Message: ... def to_dataframe(self, **kwargs) -> pd.DataFrame: ... Message = TypeVar('Message', bound=Message) File = TypeVar("File", bound=File) main.py
import numpy as np import pygrib2 as grib GRIB_FILE = 'data/ecmwf_tigge.grb' if __name__ == '__main__': with grib.Reader(GRIB_FILE) as grib_file: assert isinstance(grib_file, grib.File) message = grib_file[1] assert isinstance(message, grib.Message) assert all(isinstance(d, np.ndarray) for d in message.data())
Union[int, None]orOptional[int]can be written asint | None(PEP604) \$\endgroup\$