Source code for uataq

"""UATAQ

Read UATAQ data
"""

__version__ = "2025.11.0"
__author__ = "James Mineau"
__email__ = "jameskmineau@gmail.com"

import datetime as dt
import logging
from typing import Literal

import pandas as pd

# Best-practice for libraries: don't emit output unless the caller opts in.
logging.getLogger(__name__).addHandler(logging.NullHandler())

from . import filesystem, instruments, sites
from ._laboratory import Laboratory, get_site, laboratory
from .filesystem import DEFAULT_GROUP
from .timerange import TimeRange, TimeRangeTypes

_all_or_mult_strs = Literal["all"] | str | list[str] | tuple[str, ...] | set[str]

#: UATAQ Laboratory object.
#:
#: Built from :doc:`UATAQ configuration <config>`.
laboratory: Laboratory


# sites = {SID: laboratory.get_site(SID)  # name conflict
#          for SID in laboratory.sites}  # how much time does this take?


[docs] def read_data( SID: str, instruments: _all_or_mult_strs = "all", group: str | None = None, lvl: str | None = None, time_range: TimeRange | TimeRangeTypes = None, num_processes: int | Literal["max"] = 1, file_pattern: str | None = None, ) -> dict[str, pd.DataFrame]: """ Read data from an instrument at a site. Parameters ---------- SID : str The site ID. instruments : str | list[str] | tuple[str] | set[str] | 'all' The instrument(s) to read data from. group : str | None The group name. lvl : str | None The data level. time_range : str | list[Union[str, dt.datetime, None]] | tuple[Union[str, dt.datetime, None], Union[str, dt.datetime, None]] | slice | None The time range to read data. Default is None which reads all available data. num_processes : int | 'max' The number of processes to use. Default is 1. file_pattern : str | None A string pattern to filter the file paths. Returns ------- dict[str, pd.DataFrame] The data. """ site = get_site(SID) data = site.read_data( instruments, group, lvl, time_range, num_processes, file_pattern ) return data
[docs] def get_obs( SID: str, pollutants: _all_or_mult_strs = "all", format: Literal["wide"] | Literal["long"] = "wide", group: str | None = None, time_range: TimeRange | TimeRangeTypes = None, num_processes: int | Literal["max"] = 1, **kwargs, ) -> pd.DataFrame: """ Get observations from a site. Parameters ---------- SID : str The site ID. pollutants : str | list[str] | tuple[str] | set[str] | 'all' The pollutant(s) to get observations for. format : 'wide' | 'long' The format of the data. Default is 'wide'. group : str | None The group name. time_range : str | list[Union[str, dt.datetime, None]] | tuple[Union[str, dt.datetime, None], Union[str, dt.datetime, None]] | slice | None The time range to get observations. Default is None which gets all available data. num_processes : int | 'max' The number of processes to use. Default is 1. kwargs Additional keyword arguments to pass to the site's `get_obs` method. Returns ------- pd.DataFrame The observations. """ site = get_site(SID) obs = site.get_obs(pollutants, format, group, time_range, num_processes, **kwargs) return obs
def get_recent_obs( SID, recent: str | dt.timedelta = dt.timedelta(days=10), pollutants: _all_or_mult_strs = "all", format: Literal["wide"] | Literal["long"] = "wide", group: str | None = None, ) -> pd.DataFrame: """ Get recent observations from a site. Parameters ---------- SID : str The site ID. recent : str | dt.timedelta The recent time range. Default is 10 days. pollutants : str | list[str] | tuple[str] | set[str] | 'all' The pollutant(s) to get observations for. format : 'wide' | 'long' The format of the data. Default is 'wide'. group : str | None The group name. Returns ------- pd.DataFrame The recent observations. """ site = get_site(SID) obs = site.get_recent_obs(recent, pollutants, format, group) return obs __all__ = [ "sites", "instruments", "laboratory", "filesystem", "DEFAULT_GROUP", "get_site", "read_data", "get_obs", "get_recent_obs", ]