diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ef9d14b..23724f4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.17.14](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.14) - 2026-04-14 + +### Changed +- `api_key` and `limited_access_key` are now mutually exclusive in `NucleusClient`. Passing both (or setting `NUCLEUS_API_KEY` while also passing `limited_access_key`) raises a `ValueError`. + +### Fixed +- Docstring improvements across `NucleusClient`: fixed copy-paste errors (`get_job`, `get_slice`, `delete_slice`), removed phantom `stats_only` parameter from `list_jobs`, corrected `make_request` parameter name, and restructured `create_launch_model`/`create_launch_model_from_dir` docs for proper rendering. +- Suppressed Sphinx warnings from inherited pydantic `BaseModel` methods by removing `inherited-members` from autoapi options. + ## [0.17.13](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.17.13) - 2026-03-06 ### Fixed diff --git a/cli/client.py b/cli/client.py index 7289872a..5d0f0067 100644 --- a/cli/client.py +++ b/cli/client.py @@ -13,6 +13,11 @@ def init_client(): api_key = os.environ.get("NUCLEUS_API_KEY", None) limited_access_key = os.environ.get("NUCLEUS_LIMITED_ACCESS_KEY", None) + if api_key and limited_access_key: + raise RuntimeError( + "Both NUCLEUS_API_KEY and NUCLEUS_LIMITED_ACCESS_KEY are set. " + "Please set only one." + ) if api_key or limited_access_key: client = nucleus.NucleusClient(api_key=api_key, limited_access_key=limited_access_key) else: diff --git a/conftest.py b/conftest.py index 99e59257..f5d9bcb5 100644 --- a/conftest.py +++ b/conftest.py @@ -23,7 +23,9 @@ @pytest.fixture(scope="session") def CLIENT(): if API_KEY and LIMITED_ACCESS_KEY: - return nucleus.NucleusClient(api_key=API_KEY, limited_access_key=LIMITED_ACCESS_KEY) + raise RuntimeError( + "Set only one of NUCLEUS_PYTEST_API_KEY or NUCLEUS_PYTEST_LIMITED_ACCESS_KEY, not both." + ) if API_KEY: return nucleus.NucleusClient(api_key=API_KEY) # LIMITED_ACCESS_KEY only diff --git a/docs/conf.py b/docs/conf.py index 1bdfe35c..9a0f4ce7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,6 +48,7 @@ # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] +suppress_warnings = ["toc.not_included"] # -- Options for HTML output ------------------------------------------------- @@ -75,7 +76,6 @@ autoapi_options = [ "members", "no-undoc-members", - "inherited-members", "show-module-summary", "imported-members", ] diff --git a/nucleus/__init__.py b/nucleus/__init__.py index 8abcca2f..7420a255 100644 --- a/nucleus/__init__.py +++ b/nucleus/__init__.py @@ -142,7 +142,6 @@ NucleusAPIError, ) from .job import CustomerJobTypes -from .logger import logger from .model import Model from .model_run import ModelRun from .payload_constructor import ( @@ -179,36 +178,34 @@ class NucleusClient: """Client to interact with the Nucleus API via Python SDK. Parameters: - api_key: Follow `this guide `_ - to retrieve your API keys. **Only** optional when ``limited_access_key`` is provided. + api_key: One of ``api_key`` or ``limited_access_key`` must be provided; you cannot pass + both. For standard Scale API key authentication, pass the key here. Follow `this guide + `_ to retrieve API keys. If you omit + this argument and are not using ``limited_access_key``, the SDK falls back to the + ``NUCLEUS_API_KEY`` environment variable. + limited_access_key: Nucleus-only API key for scoped access. Reach out to your Scale + representative to obtain a limited access key. use_notebook: Whether the client is being used in a notebook (toggles tqdm style). Default is ``False``. endpoint: Base URL of the API. Default is Nucleus's current production API. - limited_access_key: Key enabling additional, scoped access. **Only** optional when ``api_key`` is provided. Reach out to your Scale representative to obtain a limited access key. - - Authentication notes: - Some users have Nucleus-only API keys. You can - instantiate the client with only ``limited_access_key`` (no ``api_key``) and the SDK - will authenticate requests using this key only. If both - ``api_key`` and ``limited_access_key`` are provided, Basic Auth (``api_key``) and the - additional limited access key will both be sent. - - .. code-block:: python - - # Using a basic auth key - import nucleus - client = nucleus.NucleusClient(api_key="YOUR_API_KEY", ...) - - # Using only a limited access key (no Basic Auth) - import nucleus - client = nucleus.NucleusClient(limited_access_key="YOUR_LIMITED_KEY", ...) - - # Using both keys (Basic Auth and limited access header) - client = nucleus.NucleusClient( - api_key="YOUR_API_KEY", - limited_access_key="YOUR_LIMITED_KEY", - ... - ) + + .. note:: + + You must provide **either** a standard Scale API key (``api_key``, or + ``NUCLEUS_API_KEY`` in the environment) **or** a Nucleus-only key + (``limited_access_key``), never both. Passing both arguments, or setting + the environment variable ``NUCLEUS_API_KEY`` while also passing + ``limited_access_key``, will raise an error. + + Example:: + + # Using a basic auth key + import nucleus + client = nucleus.NucleusClient(api_key="YOUR_API_KEY", ...) + + # Using only a limited access key (no Basic Auth) + import nucleus + client = nucleus.NucleusClient(limited_access_key="YOUR_LIMITED_KEY", ...) """ def __init__( @@ -218,6 +215,15 @@ def __init__( endpoint: Optional[str] = None, limited_access_key: Optional[str] = None, ): + effective_basic_key = ( + api_key if api_key else os.environ.get("NUCLEUS_API_KEY") + ) + if limited_access_key and effective_basic_key: + raise ValueError( + "Cannot provide both 'api_key' and 'limited_access_key'. " + "Use 'api_key' for standard Scale API key authentication, " + "or 'limited_access_key' for Nucleus-only access, but not both." + ) # Allow usage with only a limited access key if api_key is None and limited_access_key: self.api_key = None @@ -328,14 +334,13 @@ def list_jobs( """Fetches all of your running jobs in Nucleus. Parameters: - job_types: Filter on set of job types, if None, fetch all types - from_date: beginning of date range filter - to_date: end of date range filter - limit: number of results to fetch, max 50_000 - show_completed: dont fetch jobs with Completed status - stats_only: return overview of jobs, instead of a list of job objects - dataset_id: filter on a particular dataset - date_limit: Deprecated, do not use + show_completed: Whether to include jobs with Completed status. + from_date: Beginning of date range filter. + to_date: End of date range filter. + job_types: Filter on set of job types. If None, fetch all types. + limit: Number of results to fetch, max 50,000. + dataset_id: Filter on a particular dataset. + date_limit: Deprecated, do not use. Returns: List[:class:`AsyncJob`]: List of running asynchronous jobs @@ -385,10 +390,10 @@ def get_dataset(self, dataset_id: str) -> Dataset: return Dataset(dataset_id, self) def get_job(self, job_id: str) -> AsyncJob: - """Fetches a dataset by its ID. + """Fetches a job by its ID. Parameters: - job_id: The ID of the dataset to fetch. + job_id: The ID of the job to fetch. Returns: :class:`AsyncJob`: The Nucleus async job as an object. @@ -506,19 +511,19 @@ def create_dataset( Default is False (dataset of items). use_privacy_mode: Whether the images of this dataset should be uploaded to Scale. If set to True, customer will have to adjust their file access policy with Scale. - item_metadata_schema: Dict defining item-level metadata schema. See below. - annotation_metadata_schema: Dict defining annotation-level metadata schema. + item_metadata_schema: Dict defining item-level metadata schema, structured as:: - Metadata schemas must be structured as follows:: + { + "field_name": { + "type": "category" | "number" | "text" | "json" + "choices": List[str] | None + "description": str | None + }, + ... + } - { - "field_name": { - "type": "category" | "number" | "text" | "json" - "choices": List[str] | None - "description": str | None - }, - ... - } + annotation_metadata_schema: Dict defining annotation-level metadata schema. + Same format as ``item_metadata_schema``. Returns: :class:`Dataset`: The newly created Nucleus dataset as an object. @@ -660,10 +665,10 @@ def create_launch_model( Parameters: name: A human-readable name for the model. reference_id: Unique, user-controlled ID for the model. This can be - used, for example, to link to an external storage of models which - may have its own ID scheme. - bundle_args: Dict for kwargs for the creation of a Launch bundle, - more details on the keys below. + used, for example, to link to an external storage of models which + may have its own ID scheme. + bundle_args: Dict of kwargs for creating a Launch bundle. See the + note below for supported keys. metadata: An arbitrary dictionary of additional data about this model that can be stored and retrieved. For example, you can store information about the hyperparameters used in training this model. @@ -671,55 +676,28 @@ def create_launch_model( Returns: :class:`Model`: The newly created model as an object. - Details on ``bundle_args``: - Grabs an S3 signed URL and uploads a model bundle to Scale Launch. - - A model bundle consists of exactly ``{predict_fn_or_cls}``, - ``{load_predict_fn + model}``, or ``{load_predict_fn + load_model_fn}``. - Pre/post-processing code can be included inside ``load_predict_fn``/``model`` - or in the ``predict_fn_or_cls`` call. - Note: the exact parameters used depend on the version of the Launch client. - If you are on Launch client version 0.x, use ``env_params``. Otherwise, - use ``pytorch_image_tag`` and ``tensorflow_version``. - - ``bundle_args`` keys: - - - ``model_bundle_name``: Name of model bundle you want to create. - This acts as a unique identifier. - - ``predict_fn_or_cls``: Function or a callable class that runs - end-to-end (pre/post processing and model inference) on the call. - I.e. ``predict_fn_or_cls(REQUEST) -> RESPONSE``. - - ``model``: Typically a trained neural network, e.g. a PyTorch - module. - - ``load_predict_fn``: Function that, when called with ``model``, - returns a function that carries out inference. - I.e. ``load_predict_fn(model) -> func; func(REQUEST) -> RESPONSE``. - - ``load_model_fn``: Function that, when run, loads a model, e.g. a - PyTorch module. - I.e. ``load_predict_fn(load_model_fn()) -> func; func(REQUEST) -> RESPONSE``. - - ``bundle_url``: Only for self-hosted mode. Desired location of - bundle. Overrides any value given by ``self.bundle_location_fn``. - - ``requirements``: A list of Python package requirements, e.g. - ``["tensorflow==2.3.0", "tensorflow-hub==0.11.0"]``. If no list - has been passed, this defaults to the currently imported list of - packages. - - ``app_config``: Either a dictionary representing YAML file - contents or a local path to a YAML file. - - ``env_params``: Only for Launch v0. A dictionary that dictates - environment information, e.g. whether to use PyTorch or - TensorFlow and which CUDA/cuDNN versions to use. Specifically, - the dictionary should contain ``"framework_type"`` (either - ``"tensorflow"`` or ``"pytorch"``), ``"pytorch_version"`` - (if framework type is PyTorch), ``"cuda_version"``, - ``"cudnn_version"``, and ``"tensorflow_version"`` - (if framework type is TensorFlow). - - ``globals_copy``: Dictionary of the global symbol table. - Normally provided by the built-in ``globals()`` function. - - ``pytorch_image_tag``: Only for Launch v1 when using the PyTorch - framework type. The tag of the PyTorch Docker image you want to - use, e.g. ``1.11.0-cuda11.3-cudnn8-runtime``. - - ``tensorflow_version``: Only for Launch v1 when using TensorFlow. - Version of TensorFlow, e.g. ``"2.3.0"``. + .. note:: + + A bundle consists of exactly ``{predict_fn_or_cls}``, + ``{load_predict_fn + model}``, or + ``{load_predict_fn + load_model_fn}``. The exact keys depend on + the Launch client version (use ``env_params`` for v0.x, or + ``pytorch_image_tag``/``tensorflow_version`` otherwise). + + Supported ``bundle_args`` keys: + + - ``model_bundle_name``: Unique identifier for the bundle. + - ``predict_fn_or_cls``: End-to-end callable for inference. + - ``model``: Trained neural network, e.g. a PyTorch module. + - ``load_predict_fn``: Returns an inference function given a model. + - ``load_model_fn``: Loads a model. + - ``bundle_url``: Self-hosted mode only. Desired bundle location. + - ``requirements``: List of pip packages. + - ``app_config``: YAML dict or local path. + - ``env_params``: Launch v0 framework/CUDA config. + - ``globals_copy``: Global symbol table (from ``globals()``). + - ``pytorch_image_tag``: Launch v1 + PyTorch image tag. + - ``tensorflow_version``: Launch v1 + TensorFlow version. """ from launch import LaunchClient @@ -774,8 +752,8 @@ def create_launch_model_from_dir( reference_id: Unique, user-controlled ID for the model. This can be used, for example, to link to an external storage of models which may have its own id scheme. - bundle_from_dir_args: Dict for kwargs for the creation of a bundle from directory, - more details on the keys below. + bundle_from_dir_args: Dict of kwargs for creating a bundle from + local directories. See the note below for supported keys. metadata: An arbitrary dictionary of additional data about this model that can be stored and retrieved. For example, you can store information about the hyperparameters used in training this model. @@ -783,13 +761,30 @@ def create_launch_model_from_dir( Returns: :class:`Model`: The newly created model as an object. - Details on ``bundle_from_dir_args``: - Packages up code from one or more local filesystem folders and uploads - them as a bundle to Scale Launch. In this mode, a bundle is just local - code instead of a serialized object. + .. note:: + + Code from one or more local filesystem folders is packaged into a + zip and uploaded to Scale Launch. Contents are unzipped relative to + the server-side ``PYTHONPATH``, so module paths should reflect the + directory structure (e.g. ``my_module.my_file.f``). The exact keys + depend on the Launch client version (use ``env_params`` for v0.x, + or ``pytorch_image_tag``/``tensorflow_version`` otherwise). + + Supported ``bundle_from_dir_args`` keys: - For example, if you have a directory structure like this, and your - current working directory is also ``my_root``:: + - ``model_bundle_name``: Unique identifier for the bundle. + - ``base_paths``: Local dirs containing the bundle code. + - ``requirements_path``: Path to a ``requirements.txt`` file. + - ``env_params``: Launch v0 framework/CUDA config. + - ``load_predict_fn_module_path``: Module path for inference fn. + - ``load_model_fn_module_path``: Module path for model loader. + - ``app_config``: YAML dict or local path. + - ``pytorch_image_tag``: Launch v1 + PyTorch image tag. + - ``tensorflow_version``: Launch v1 + TensorFlow version. + + .. note:: + + For example, given this directory structure:: my_root/ my_module1/ @@ -800,60 +795,12 @@ def create_launch_model_from_dir( __init__.py ...files and directories - Calling ``create_model_bundle_from_dirs`` with - ``base_paths=["my_module1", "my_module2"]`` essentially creates a zip - file without the root directory, e.g.:: - - my_module1/ - __init__.py - ...files and directories - my_inference_file.py - my_module2/ - __init__.py - ...files and directories - - These contents will be unzipped relative to the server-side - ``PYTHONPATH``. Bear this in mind when referencing Python module paths - for this bundle. For instance, if ``my_inference_file.py`` has - ``def f(...)`` as the desired inference loading function, then + Calling with ``base_paths=["my_module1", "my_module2"]`` creates a + zip without the root directory. Contents are unzipped relative to + the server-side ``PYTHONPATH``. If ``my_inference_file.py`` has + ``def f(...)`` as the inference loading function, then ``load_predict_fn_module_path`` should be ``my_module1.my_inference_file.f``. - - Note: the exact keys for ``bundle_from_dir_args`` depend on the - version of the Launch client. If you are on Launch client version 0.x, - you will use ``env_params``; otherwise, you will use - ``pytorch_image_tag`` and ``tensorflow_version``. - - Keys for ``bundle_from_dir_args``: - - - ``model_bundle_name``: Name of model bundle you want to create. - This acts as a unique identifier. - - ``base_paths``: The paths on the local filesystem where the bundle - code lives. - - ``requirements_path``: A path on the local filesystem where a - ``requirements.txt`` file lives. - - ``env_params``: Only for Launch v0. A dictionary that dictates - environment information, e.g. whether to use PyTorch or - TensorFlow and which CUDA/cuDNN versions to use. Specifically, - the dictionary should contain ``"framework_type"`` (either - ``"tensorflow"`` or ``"pytorch"``), ``"pytorch_version"`` - (if framework type is PyTorch), ``"cuda_version"``, - ``"cudnn_version"``, and ``"tensorflow_version"`` - (if framework type is TensorFlow). - - ``load_predict_fn_module_path``: A Python module path for a - function that, when called with the output of - ``load_model_fn_module_path``, returns a function that carries out - inference. - - ``load_model_fn_module_path``: A Python module path for a - function that returns a model. The output feeds into the function - located at ``load_predict_fn_module_path``. - - ``app_config``: Either a dictionary representing YAML file - contents or a local path to a YAML file. - - ``pytorch_image_tag``: Only for Launch v1, and if you want to use - the PyTorch framework type. The tag of the PyTorch Docker image - you want to use, e.g. ``1.11.0-cuda11.3-cudnn8-runtime``. - - ``tensorflow_version``: Only for Launch v1, and if you want to - use TensorFlow. Version of TensorFlow, e.g. ``"2.3.0"``. """ from launch import LaunchClient @@ -993,7 +940,7 @@ def get_slice(self, slice_id: str) -> Slice: """Returns a slice object by Nucleus-generated ID. Parameters: - slice_id: Nucleus-generated dataset ID (starts with ``slc_``). This can + slice_id: Nucleus-generated slice ID (starts with ``slc_``). This can be retrieved via :meth:`Dataset.slices` or a Nucleus dashboard URL. Returns: @@ -1016,7 +963,7 @@ def delete_slice(self, slice_id: str) -> dict: """Deletes slice from Nucleus. Parameters: - slice_id: Nucleus-generated dataset ID (starts with ``slc_``). This can + slice_id: Nucleus-generated slice ID (starts with ``slc_``). This can be retrieved via :meth:`Dataset.slices` or a Nucleus dashboard URL. Returns: @@ -1128,7 +1075,7 @@ def download_pointcloud_task( self, task_id: str, frame_num: int ) -> List[Union[Point3D, LidarPoint]]: """ - Download the lidar point cloud data for a give task and frame number. + Download the lidar point cloud data for a given task and frame number. Parameters: task_id: download point cloud for this particular task @@ -1287,8 +1234,8 @@ def make_request( Parameters: payload: Given request payload. route: Route for the request. - Requests command: ``requests.post``, ``requests.get``, or ``requests.delete``. - return_raw_response: return the request's response object entirely + requests_command: ``requests.post``, ``requests.get``, or ``requests.delete``. + return_raw_response: Whether to return the raw response object. Returns: Response payload as JSON dict or request object. @@ -1306,7 +1253,7 @@ def make_request( def _set_api_key(self, api_key): """Fetch API key from environment variable NUCLEUS_API_KEY if not set""" api_key = ( - api_key if api_key else os.environ.get("NUCLEUS_API_KEY", None) + api_key if api_key else os.environ.get("NUCLEUS_API_KEY") ) if api_key is None: raise NoAPIKey() @@ -1315,14 +1262,13 @@ def _set_api_key(self, api_key): @staticmethod def valid_dirname(dirname) -> str: - """ - Validate directory exists - Args: - dirname: Path of directory + """Validates that a directory exists. - Returns: - Existing directory path + Parameters: + dirname: Path of directory. + Returns: + Existing directory path. """ # ensures path ends with a slash _dirname = os.path.join(os.path.expanduser(dirname), "") diff --git a/nucleus/connection.py b/nucleus/connection.py index 7930d724..35a5faeb 100644 --- a/nucleus/connection.py +++ b/nucleus/connection.py @@ -17,6 +17,12 @@ def __init__(self, api_key: Optional[str] = None, endpoint: Optional[str] = None self.api_key = api_key self.endpoint = endpoint self.extra_headers = extra_headers or {} + if self.api_key is not None and self.extra_headers.get( + "x-limited-access-key" + ): + raise ValueError( + "Cannot use both api key and limited access key simultaneously." + ) # Require at least one auth mechanism: Basic (api_key) or limited access header if self.api_key is None and not self.extra_headers.get("x-limited-access-key"): raise NoAPIKey() diff --git a/pyproject.toml b/pyproject.toml index b70c7468..ba785688 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.17.13" +version = "0.17.14" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "] diff --git a/scripts/load_test.py b/scripts/load_test.py index 37e0035c..700750c0 100644 --- a/scripts/load_test.py +++ b/scripts/load_test.py @@ -85,6 +85,10 @@ def client(): raise RuntimeError( "Set at least one of api_key or limited_access_key (via flags or env)." ) + if FLAGS.api_key and FLAGS.limited_access_key: + raise RuntimeError( + "Set only one of api_key or limited_access_key (via flags or env), not both." + ) return nucleus.NucleusClient( api_key=FLAGS.api_key, limited_access_key=FLAGS.limited_access_key )