"""UATAQ
Read UATAQ data
"""
__version__ = "2025.11.0"
__author__ = "James Mineau"
__email__ = "jameskmineau@gmail.com"
import datetime as dt
import logging
from typing import Literal
import pandas as pd
# Best-practice for libraries: don't emit output unless the caller opts in.
logging.getLogger(__name__).addHandler(logging.NullHandler())
from . import filesystem, instruments, sites
from ._laboratory import Laboratory, get_site, laboratory
from .filesystem import DEFAULT_GROUP
from .timerange import TimeRange, TimeRangeTypes
_all_or_mult_strs = Literal["all"] | str | list[str] | tuple[str, ...] | set[str]
#: UATAQ Laboratory object.
#:
#: Built from :doc:`UATAQ configuration <config>`.
laboratory: Laboratory
# sites = {SID: laboratory.get_site(SID) # name conflict
# for SID in laboratory.sites} # how much time does this take?
[docs]
def read_data(
SID: str,
instruments: _all_or_mult_strs = "all",
group: str | None = None,
lvl: str | None = None,
time_range: TimeRange | TimeRangeTypes = None,
num_processes: int | Literal["max"] = 1,
file_pattern: str | None = None,
) -> dict[str, pd.DataFrame]:
"""
Read data from an instrument at a site.
Parameters
----------
SID : str
The site ID.
instruments : str | list[str] | tuple[str] | set[str] | 'all'
The instrument(s) to read data from.
group : str | None
The group name.
lvl : str | None
The data level.
time_range : str | list[Union[str, dt.datetime, None]] | tuple[Union[str, dt.datetime, None], Union[str, dt.datetime, None]] | slice | None
The time range to read data. Default is None which reads all available data.
num_processes : int | 'max'
The number of processes to use. Default is 1.
file_pattern : str | None
A string pattern to filter the file paths.
Returns
-------
dict[str, pd.DataFrame]
The data.
"""
site = get_site(SID)
data = site.read_data(
instruments, group, lvl, time_range, num_processes, file_pattern
)
return data
[docs]
def get_obs(
SID: str,
pollutants: _all_or_mult_strs = "all",
format: Literal["wide"] | Literal["long"] = "wide",
group: str | None = None,
time_range: TimeRange | TimeRangeTypes = None,
num_processes: int | Literal["max"] = 1,
**kwargs,
) -> pd.DataFrame:
"""
Get observations from a site.
Parameters
----------
SID : str
The site ID.
pollutants : str | list[str] | tuple[str] | set[str] | 'all'
The pollutant(s) to get observations for.
format : 'wide' | 'long'
The format of the data. Default is 'wide'.
group : str | None
The group name.
time_range : str | list[Union[str, dt.datetime, None]] | tuple[Union[str, dt.datetime, None], Union[str, dt.datetime, None]] | slice | None
The time range to get observations. Default is None which gets all available data.
num_processes : int | 'max'
The number of processes to use. Default is 1.
kwargs
Additional keyword arguments to pass to the site's `get_obs` method.
Returns
-------
pd.DataFrame
The observations.
"""
site = get_site(SID)
obs = site.get_obs(pollutants, format, group, time_range, num_processes, **kwargs)
return obs
def get_recent_obs(
SID,
recent: str | dt.timedelta = dt.timedelta(days=10),
pollutants: _all_or_mult_strs = "all",
format: Literal["wide"] | Literal["long"] = "wide",
group: str | None = None,
) -> pd.DataFrame:
"""
Get recent observations from a site.
Parameters
----------
SID : str
The site ID.
recent : str | dt.timedelta
The recent time range. Default is 10 days.
pollutants : str | list[str] | tuple[str] | set[str] | 'all'
The pollutant(s) to get observations for.
format : 'wide' | 'long'
The format of the data. Default is 'wide'.
group : str | None
The group name.
Returns
-------
pd.DataFrame
The recent observations.
"""
site = get_site(SID)
obs = site.get_recent_obs(recent, pollutants, format, group)
return obs
__all__ = [
"sites",
"instruments",
"laboratory",
"filesystem",
"DEFAULT_GROUP",
"get_site",
"read_data",
"get_obs",
"get_recent_obs",
]