diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index f18270d528..2a2ee2b8f3 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "1.91.0"
+ ".": "1.92.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 1e0182cf22..ebbf3ee296 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
configured_endpoints: 111
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-ef4ecb19eb61e24c49d77fef769ee243e5279bc0bdbaee8d0f8dba4da8722559.yml
-openapi_spec_hash: 1b8a9767c9f04e6865b06c41948cdc24
-config_hash: fd2af1d5eff0995bb7dc02ac9a34851d
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-cca460eaf5cc13e9d6e5293eb97aac53d66dc1385c691f74b768c97d165b6e8b.yml
+openapi_spec_hash: 9ec43d443b3dd58ca5aa87eb0a7eb49f
+config_hash: e74d6791681e3af1b548748ff47a22c2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14562edfac..60ab8eb6a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog
+## 1.92.0 (2025-06-26)
+
+Full Changelog: [v1.91.0...v1.92.0](https://github.com/openai/openai-python/compare/v1.91.0...v1.92.0)
+
+### Features
+
+* **api:** webhook and deep research support ([d3bb116](https://github.com/openai/openai-python/commit/d3bb116f34f470502f902b88131deec43a953b12))
+* **client:** move stream and parse out of beta ([0e358ed](https://github.com/openai/openai-python/commit/0e358ed66b317038705fb38958a449d284f3cb88))
+
+
+### Bug Fixes
+
+* **ci:** release-doctor — report correct token name ([ff8c556](https://github.com/openai/openai-python/commit/ff8c5561e44e8a0902732b5934c97299d2c98d4e))
+
+
+### Chores
+
+* **internal:** add tests for breaking change detection ([710fe8f](https://github.com/openai/openai-python/commit/710fe8fd5f9e33730338341680152d3f2556dfa0))
+* **tests:** skip some failing tests on the latest python versions ([93ccc38](https://github.com/openai/openai-python/commit/93ccc38a8ef1575d77d33d031666d07d10e4af72))
+
## 1.91.0 (2025-06-23)
Full Changelog: [v1.90.0...v1.91.0](https://github.com/openai/openai-python/compare/v1.90.0...v1.91.0)
diff --git a/README.md b/README.md
index 4861e4aaab..763428ddc8 100644
--- a/README.md
+++ b/README.md
@@ -406,6 +406,84 @@ client.files.create(
The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
+## Webhook Verification
+
+Verifying webhook signatures is _optional but encouraged_.
+
+### Parsing webhook payloads
+
+For most use cases, you will likely want to verify the webhook and parse the payload at the same time. To achieve this, we provide the method `client.webhooks.unwrap()`, which parses a webhook request and verifies that it was sent by OpenAI. This method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). The `.unwrap()` method will parse this JSON for you into an event object after verifying the webhook was sent from OpenAI.
+
+```python
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI() # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+ request_body = request.get_data(as_text=True)
+
+ try:
+ event = client.webhooks.unwrap(request_body, request.headers)
+
+ if event.type == "response.completed":
+ print("Response completed:", event.data)
+ elif event.type == "response.failed":
+ print("Response failed:", event.data)
+ else:
+ print("Unhandled event type:", event.type)
+
+ return "ok"
+ except Exception as e:
+ print("Invalid signature:", e)
+ return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+ app.run(port=8000)
+```
+
+### Verifying webhook payloads directly
+
+In some cases, you may want to verify the webhook separately from parsing the payload. If you prefer to handle these steps separately, we provide the method `client.webhooks.verify_signature()` to _only verify_ the signature of a webhook request. Like `.unwrap()`, this method will raise an error if the signature is invalid.
+
+Note that the `body` parameter must be the raw JSON string sent from the server (do not parse it first). You will then need to parse the body after verifying the signature.
+
+```python
+import json
+from openai import OpenAI
+from flask import Flask, request
+
+app = Flask(__name__)
+client = OpenAI() # OPENAI_WEBHOOK_SECRET environment variable is used by default
+
+
+@app.route("/webhook", methods=["POST"])
+def webhook():
+ request_body = request.get_data(as_text=True)
+
+ try:
+ client.webhooks.verify_signature(request_body, request.headers)
+
+ # Parse the body after verification
+ event = json.loads(request_body)
+ print("Verified event:", event)
+
+ return "ok"
+ except Exception as e:
+ print("Invalid signature:", e)
+ return "Invalid signature", 400
+
+
+if __name__ == "__main__":
+ app.run(port=8000)
+```
+
## Handling errors
When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `openai.APIConnectionError` is raised.
diff --git a/api.md b/api.md
index 25360d741e..abf0de481d 100644
--- a/api.md
+++ b/api.md
@@ -395,6 +395,35 @@ Methods:
- client.vector_stores.file_batches.poll(\*args) -> VectorStoreFileBatch
- client.vector_stores.file_batches.upload_and_poll(\*args) -> VectorStoreFileBatch
+# Webhooks
+
+Types:
+
+```python
+from openai.types.webhooks import (
+ BatchCancelledWebhookEvent,
+ BatchCompletedWebhookEvent,
+ BatchExpiredWebhookEvent,
+ BatchFailedWebhookEvent,
+ EvalRunCanceledWebhookEvent,
+ EvalRunFailedWebhookEvent,
+ EvalRunSucceededWebhookEvent,
+ FineTuningJobCancelledWebhookEvent,
+ FineTuningJobFailedWebhookEvent,
+ FineTuningJobSucceededWebhookEvent,
+ ResponseCancelledWebhookEvent,
+ ResponseCompletedWebhookEvent,
+ ResponseFailedWebhookEvent,
+ ResponseIncompleteWebhookEvent,
+ UnwrapWebhookEvent,
+)
+```
+
+Methods:
+
+- client.webhooks.unwrap(payload, headers, \*, secret) -> UnwrapWebhookEvent
+- client.webhooks.verify_signature(payload, headers, \*, secret, tolerance) -> None
+
# Beta
## Realtime
@@ -774,6 +803,7 @@ from openai.types.responses import (
ResponseWebSearchCallSearchingEvent,
Tool,
ToolChoiceFunction,
+ ToolChoiceMcp,
ToolChoiceOptions,
ToolChoiceTypes,
WebSearchTool,
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 2cc5ad6352..044ed525d1 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -7,7 +7,7 @@ if [ -z "${STAINLESS_API_KEY}" ]; then
fi
if [ -z "${PYPI_TOKEN}" ]; then
- errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+ errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
fi
lenErrors=${#errors[@]}
diff --git a/examples/parsing.py b/examples/parsing.py
index 17e5db52ec..906ce974c1 100644
--- a/examples/parsing.py
+++ b/examples/parsing.py
@@ -18,7 +18,7 @@ class MathResponse(BaseModel):
client = OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "You are a helpful math tutor."},
diff --git a/examples/parsing_stream.py b/examples/parsing_stream.py
index 6c6f078f77..1be7853098 100644
--- a/examples/parsing_stream.py
+++ b/examples/parsing_stream.py
@@ -18,7 +18,7 @@ class MathResponse(BaseModel):
client = OpenAI()
-with client.beta.chat.completions.stream(
+with client.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "You are a helpful math tutor."},
diff --git a/examples/parsing_tools.py b/examples/parsing_tools.py
index c6065eeb7a..26921b1df6 100644
--- a/examples/parsing_tools.py
+++ b/examples/parsing_tools.py
@@ -57,7 +57,7 @@ class Query(BaseModel):
client = OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
diff --git a/examples/parsing_tools_stream.py b/examples/parsing_tools_stream.py
index eea6f6a43a..b7dcd3d230 100644
--- a/examples/parsing_tools_stream.py
+++ b/examples/parsing_tools_stream.py
@@ -15,7 +15,7 @@ class GetWeather(BaseModel):
client = OpenAI()
-with client.beta.chat.completions.stream(
+with client.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
diff --git a/helpers.md b/helpers.md
index 77823fa750..21ad8ac2fb 100644
--- a/helpers.md
+++ b/helpers.md
@@ -2,7 +2,7 @@
The OpenAI API supports extracting JSON from the model with the `response_format` request param, for more details on the API, see [this guide](https://platform.openai.com/docs/guides/structured-outputs).
-The SDK provides a `client.beta.chat.completions.parse()` method which is a wrapper over the `client.chat.completions.create()` that
+The SDK provides a `client.chat.completions.parse()` method which is a wrapper over the `client.chat.completions.create()` that
provides richer integrations with Python specific types & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
## Auto-parsing response content with Pydantic models
@@ -24,7 +24,7 @@ class MathResponse(BaseModel):
final_answer: str
client = OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "You are a helpful math tutor."},
@@ -44,6 +44,7 @@ else:
## Auto-parsing function tool calls
The `.parse()` method will also automatically parse `function` tool calls if:
+
- You use the `openai.pydantic_function_tool()` helper method
- You mark your tool schema with `"strict": True`
@@ -96,7 +97,7 @@ class Query(BaseModel):
order_by: OrderBy
client = openai.OpenAI()
-completion = client.beta.chat.completions.parse(
+completion = client.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -121,7 +122,7 @@ print(tool_call.function.parsed_arguments.table_name)
### Differences from `.create()`
-The `beta.chat.completions.parse()` method imposes some additional restrictions on it's usage that `chat.completions.create()` does not.
+The `chat.completions.parse()` method imposes some additional restrictions on it's usage that `chat.completions.create()` does not.
- If the completion completes with `finish_reason` set to `length` or `content_filter`, the `LengthFinishReasonError` / `ContentFilterFinishReasonError` errors will be raised.
- Only strict function tools can be passed, e.g. `{'type': 'function', 'function': {..., 'strict': True}}`
@@ -132,7 +133,7 @@ OpenAI supports streaming responses when interacting with the [Chat Completion](
## Chat Completions API
-The SDK provides a `.beta.chat.completions.stream()` method that wraps the `.chat.completions.create(stream=True)` stream providing a more granular event API & automatic accumulation of each delta.
+The SDK provides a `.chat.completions.stream()` method that wraps the `.chat.completions.create(stream=True)` stream providing a more granular event API & automatic accumulation of each delta.
It also supports all aforementioned [parsing helpers](#structured-outputs-parsing-helpers).
@@ -143,7 +144,7 @@ from openai import AsyncOpenAI
client = AsyncOpenAI()
-async with client.beta.chat.completions.stream(
+async with client.chat.completions.stream(
model='gpt-4o-2024-08-06',
messages=[...],
) as stream:
@@ -263,7 +264,7 @@ A handful of helper methods are provided on the stream class for additional conv
Returns the accumulated `ParsedChatCompletion` object
```py
-async with client.beta.chat.completions.stream(...) as stream:
+async with client.chat.completions.stream(...) as stream:
...
completion = await stream.get_final_completion()
@@ -275,7 +276,7 @@ print(completion.choices[0].message)
If you want to wait for the stream to complete, you can use the `.until_done()` method.
```py
-async with client.beta.chat.completions.stream(...) as stream:
+async with client.chat.completions.stream(...) as stream:
await stream.until_done()
# stream is now finished
```
diff --git a/pyproject.toml b/pyproject.toml
index 1f2b8a6044..eb9008a3a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "openai"
-version = "1.91.0"
+version = "1.92.0"
description = "The official Python library for the openai API"
dynamic = ["readme"]
license = "Apache-2.0"
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 5fb1520549..226fed9554 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -30,6 +30,7 @@
LengthFinishReasonError,
UnprocessableEntityError,
APIResponseValidationError,
+ InvalidWebhookSignatureError,
ContentFilterFinishReasonError,
)
from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
@@ -62,6 +63,7 @@
"InternalServerError",
"LengthFinishReasonError",
"ContentFilterFinishReasonError",
+ "InvalidWebhookSignatureError",
"Timeout",
"RequestOptions",
"Client",
@@ -121,6 +123,8 @@
project: str | None = None
+webhook_secret: str | None = None
+
base_url: str | _httpx.URL | None = None
timeout: float | Timeout | None = DEFAULT_TIMEOUT
@@ -183,6 +187,17 @@ def project(self, value: str | None) -> None: # type: ignore
project = value
+ @property # type: ignore
+ @override
+ def webhook_secret(self) -> str | None:
+ return webhook_secret
+
+ @webhook_secret.setter # type: ignore
+ def webhook_secret(self, value: str | None) -> None: # type: ignore
+ global webhook_secret
+
+ webhook_secret = value
+
@property
@override
def base_url(self) -> _httpx.URL:
@@ -335,6 +350,7 @@ def _load_client() -> OpenAI: # type: ignore[reportUnusedFunction]
api_key=api_key,
organization=organization,
project=project,
+ webhook_secret=webhook_secret,
base_url=base_url,
timeout=timeout,
max_retries=max_retries,
@@ -363,6 +379,7 @@ def _reset_client() -> None: # type: ignore[reportUnusedFunction]
models as models,
batches as batches,
uploads as uploads,
+ webhooks as webhooks,
responses as responses,
containers as containers,
embeddings as embeddings,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 4ed9a2f52e..f3a83afec3 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -57,6 +57,7 @@
from .resources.images import Images, AsyncImages
from .resources.models import Models, AsyncModels
from .resources.batches import Batches, AsyncBatches
+ from .resources.webhooks import Webhooks, AsyncWebhooks
from .resources.beta.beta import Beta, AsyncBeta
from .resources.chat.chat import Chat, AsyncChat
from .resources.embeddings import Embeddings, AsyncEmbeddings
@@ -78,6 +79,7 @@ class OpenAI(SyncAPIClient):
api_key: str
organization: str | None
project: str | None
+ webhook_secret: str | None
websocket_base_url: str | httpx.URL | None
"""Base URL for WebSocket connections.
@@ -93,6 +95,7 @@ def __init__(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
base_url: str | httpx.URL | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
@@ -119,6 +122,7 @@ def __init__(
- `api_key` from `OPENAI_API_KEY`
- `organization` from `OPENAI_ORG_ID`
- `project` from `OPENAI_PROJECT_ID`
+ - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
"""
if api_key is None:
api_key = os.environ.get("OPENAI_API_KEY")
@@ -136,6 +140,10 @@ def __init__(
project = os.environ.get("OPENAI_PROJECT_ID")
self.project = project
+ if webhook_secret is None:
+ webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+ self.webhook_secret = webhook_secret
+
self.websocket_base_url = websocket_base_url
if base_url is None:
@@ -216,6 +224,12 @@ def vector_stores(self) -> VectorStores:
return VectorStores(self)
+ @cached_property
+ def webhooks(self) -> Webhooks:
+ from .resources.webhooks import Webhooks
+
+ return Webhooks(self)
+
@cached_property
def beta(self) -> Beta:
from .resources.beta import Beta
@@ -288,6 +302,7 @@ def copy(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -325,6 +340,7 @@ def copy(
api_key=api_key or self.api_key,
organization=organization or self.organization,
project=project or self.project,
+ webhook_secret=webhook_secret or self.webhook_secret,
websocket_base_url=websocket_base_url or self.websocket_base_url,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
@@ -379,6 +395,7 @@ class AsyncOpenAI(AsyncAPIClient):
api_key: str
organization: str | None
project: str | None
+ webhook_secret: str | None
websocket_base_url: str | httpx.URL | None
"""Base URL for WebSocket connections.
@@ -394,6 +411,7 @@ def __init__(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
base_url: str | httpx.URL | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
@@ -420,6 +438,7 @@ def __init__(
- `api_key` from `OPENAI_API_KEY`
- `organization` from `OPENAI_ORG_ID`
- `project` from `OPENAI_PROJECT_ID`
+ - `webhook_secret` from `OPENAI_WEBHOOK_SECRET`
"""
if api_key is None:
api_key = os.environ.get("OPENAI_API_KEY")
@@ -437,6 +456,10 @@ def __init__(
project = os.environ.get("OPENAI_PROJECT_ID")
self.project = project
+ if webhook_secret is None:
+ webhook_secret = os.environ.get("OPENAI_WEBHOOK_SECRET")
+ self.webhook_secret = webhook_secret
+
self.websocket_base_url = websocket_base_url
if base_url is None:
@@ -517,6 +540,12 @@ def vector_stores(self) -> AsyncVectorStores:
return AsyncVectorStores(self)
+ @cached_property
+ def webhooks(self) -> AsyncWebhooks:
+ from .resources.webhooks import AsyncWebhooks
+
+ return AsyncWebhooks(self)
+
@cached_property
def beta(self) -> AsyncBeta:
from .resources.beta import AsyncBeta
@@ -589,6 +618,7 @@ def copy(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -626,6 +656,7 @@ def copy(
api_key=api_key or self.api_key,
organization=organization or self.organization,
project=project or self.project,
+ webhook_secret=webhook_secret or self.webhook_secret,
websocket_base_url=websocket_base_url or self.websocket_base_url,
base_url=base_url or self.base_url,
timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
diff --git a/src/openai/_exceptions.py b/src/openai/_exceptions.py
index e326ed9578..09016dfedb 100644
--- a/src/openai/_exceptions.py
+++ b/src/openai/_exceptions.py
@@ -24,6 +24,7 @@
"InternalServerError",
"LengthFinishReasonError",
"ContentFilterFinishReasonError",
+ "InvalidWebhookSignatureError",
]
@@ -154,3 +155,7 @@ def __init__(self) -> None:
super().__init__(
f"Could not parse response content as the request was rejected by the content filter",
)
+
+
+class InvalidWebhookSignatureError(ValueError):
+ """Raised when a webhook signature is invalid, meaning the computed signature does not match the expected signature."""
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index fb7c754917..a80e939300 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -10,6 +10,7 @@
from .resources.images import Images
from .resources.models import Models
from .resources.batches import Batches
+ from .resources.webhooks import Webhooks
from .resources.beta.beta import Beta
from .resources.chat.chat import Chat
from .resources.embeddings import Embeddings
@@ -81,6 +82,12 @@ def __load__(self) -> Uploads:
return _load_client().uploads
+class WebhooksProxy(LazyProxy["Webhooks"]):
+ @override
+ def __load__(self) -> Webhooks:
+ return _load_client().webhooks
+
+
class ResponsesProxy(LazyProxy["Responses"]):
@override
def __load__(self) -> Responses:
@@ -132,6 +139,7 @@ def __load__(self) -> VectorStores:
models: Models = ModelsProxy().__as_proxied__()
batches: Batches = BatchesProxy().__as_proxied__()
uploads: Uploads = UploadsProxy().__as_proxied__()
+webhooks: Webhooks = WebhooksProxy().__as_proxied__()
responses: Responses = ResponsesProxy().__as_proxied__()
embeddings: Embeddings = EmbeddingsProxy().__as_proxied__()
containers: Containers = ContainersProxy().__as_proxied__()
diff --git a/src/openai/_version.py b/src/openai/_version.py
index d1cad1dd01..64bc847523 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "openai"
-__version__ = "1.91.0" # x-release-please-version
+__version__ = "1.92.0" # x-release-please-version
diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
index 655dd71d4c..a994e4256c 100644
--- a/src/openai/lib/azure.py
+++ b/src/openai/lib/azure.py
@@ -98,6 +98,7 @@ def __init__(
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -117,6 +118,7 @@ def __init__(
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -136,6 +138,7 @@ def __init__(
azure_ad_token: str | None = None,
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -156,6 +159,7 @@ def __init__(
azure_ad_token_provider: AzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
base_url: str | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -234,6 +238,7 @@ def __init__(
api_key=api_key,
organization=organization,
project=project,
+ webhook_secret=webhook_secret,
base_url=base_url,
timeout=timeout,
max_retries=max_retries,
@@ -256,6 +261,7 @@ def copy(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
api_version: str | None = None,
azure_ad_token: str | None = None,
@@ -277,6 +283,7 @@ def copy(
api_key=api_key,
organization=organization,
project=project,
+ webhook_secret=webhook_secret,
websocket_base_url=websocket_base_url,
base_url=base_url,
timeout=timeout,
@@ -370,6 +377,7 @@ def __init__(
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -390,6 +398,7 @@ def __init__(
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -410,6 +419,7 @@ def __init__(
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
@@ -430,6 +440,7 @@ def __init__(
azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
base_url: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
@@ -508,6 +519,7 @@ def __init__(
api_key=api_key,
organization=organization,
project=project,
+ webhook_secret=webhook_secret,
base_url=base_url,
timeout=timeout,
max_retries=max_retries,
@@ -530,6 +542,7 @@ def copy(
api_key: str | None = None,
organization: str | None = None,
project: str | None = None,
+ webhook_secret: str | None = None,
websocket_base_url: str | httpx.URL | None = None,
api_version: str | None = None,
azure_ad_token: str | None = None,
@@ -551,6 +564,7 @@ def copy(
api_key=api_key,
organization=organization,
project=project,
+ webhook_secret=webhook_secret,
websocket_base_url=websocket_base_url,
base_url=base_url,
timeout=timeout,
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index a7b70c32d3..2cf37efeae 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -128,7 +128,7 @@ class ChatCompletionStreamManager(Generic[ResponseFormatT]):
Usage:
```py
- with client.beta.chat.completions.stream(...) as stream:
+ with client.chat.completions.stream(...) as stream:
for event in stream:
...
```
@@ -251,7 +251,7 @@ class AsyncChatCompletionStreamManager(Generic[ResponseFormatT]):
Usage:
```py
- async with client.beta.chat.completions.stream(...) as stream:
+ async with client.chat.completions.stream(...) as stream:
for event in stream:
...
```
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 62fc8258b9..4feaaab44b 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -3,7 +3,6 @@
from __future__ import annotations
from ..._compat import cached_property
-from .chat.chat import Chat, AsyncChat
from .assistants import (
Assistants,
AsyncAssistants,
@@ -21,6 +20,7 @@
ThreadsWithStreamingResponse,
AsyncThreadsWithStreamingResponse,
)
+from ...resources.chat import Chat, AsyncChat
from .realtime.realtime import (
Realtime,
AsyncRealtime,
diff --git a/src/openai/resources/beta/chat/__init__.py b/src/openai/resources/beta/chat/__init__.py
deleted file mode 100644
index 072d7867a5..0000000000
--- a/src/openai/resources/beta/chat/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .chat import Chat, AsyncChat
-from .completions import Completions, AsyncCompletions
-
-__all__ = [
- "Completions",
- "AsyncCompletions",
- "Chat",
- "AsyncChat",
-]
diff --git a/src/openai/resources/beta/chat/chat.py b/src/openai/resources/beta/chat/chat.py
deleted file mode 100644
index 6afdcea381..0000000000
--- a/src/openai/resources/beta/chat/chat.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from ...._compat import cached_property
-from .completions import Completions, AsyncCompletions
-from ...._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["Chat", "AsyncChat"]
-
-
-class Chat(SyncAPIResource):
- @cached_property
- def completions(self) -> Completions:
- return Completions(self._client)
-
-
-class AsyncChat(AsyncAPIResource):
- @cached_property
- def completions(self) -> AsyncCompletions:
- return AsyncCompletions(self._client)
diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py
deleted file mode 100644
index 871c4ab48a..0000000000
--- a/src/openai/resources/beta/chat/completions.py
+++ /dev/null
@@ -1,634 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Type, Union, Iterable, Optional, cast
-from functools import partial
-from typing_extensions import Literal
-
-import httpx
-
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...._streaming import Stream
-from ....types.chat import completion_create_params
-from ...._base_client import make_request_options
-from ....lib._parsing import (
- ResponseFormatT,
- validate_input_tools as _validate_input_tools,
- parse_chat_completion as _parse_chat_completion,
- type_to_response_format_param as _type_to_response_format,
-)
-from ....types.chat_model import ChatModel
-from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
-from ....types.shared_params import Metadata, ReasoningEffort
-from ....types.chat.chat_completion import ChatCompletion
-from ....types.chat.chat_completion_chunk import ChatCompletionChunk
-from ....types.chat.parsed_chat_completion import ParsedChatCompletion
-from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
-from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
-from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
-from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
-from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
-from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
- @cached_property
- def with_raw_response(self) -> CompletionsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return the
- the raw response object instead of the parsed content.
-
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return CompletionsWithRawResponse(self)
-
- @cached_property
- def with_streaming_response(self) -> CompletionsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return CompletionsWithStreamingResponse(self)
-
- def parse(
- self,
- *,
- messages: Iterable[ChatCompletionMessageParam],
- model: Union[str, ChatModel],
- audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
- response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
- frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
- functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
- logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
- logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
- max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
- modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
- n: Optional[int] | NotGiven = NOT_GIVEN,
- parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
- prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
- presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
- seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
- stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
- store: Optional[bool] | NotGiven = NOT_GIVEN,
- stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
- temperature: Optional[float] | NotGiven = NOT_GIVEN,
- tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
- tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
- top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
- top_p: Optional[float] | NotGiven = NOT_GIVEN,
- user: str | NotGiven = NOT_GIVEN,
- web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> ParsedChatCompletion[ResponseFormatT]:
- """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
- & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
-
- You can pass a pydantic model to this method and it will automatically convert the model
- into a JSON schema, send it to the API and parse the response content back into the given model.
-
- This method will also automatically parse `function` tool calls if:
- - You use the `openai.pydantic_function_tool()` helper method
- - You mark your tool schema with `"strict": True`
-
- Example usage:
- ```py
- from pydantic import BaseModel
- from openai import OpenAI
-
-
- class Step(BaseModel):
- explanation: str
- output: str
-
-
- class MathResponse(BaseModel):
- steps: List[Step]
- final_answer: str
-
-
- client = OpenAI()
- completion = client.beta.chat.completions.parse(
- model="gpt-4o-2024-08-06",
- messages=[
- {"role": "system", "content": "You are a helpful math tutor."},
- {"role": "user", "content": "solve 8x + 31 = 2"},
- ],
- response_format=MathResponse,
- )
-
- message = completion.choices[0].message
- if message.parsed:
- print(message.parsed.steps)
- print("answer: ", message.parsed.final_answer)
- ```
- """
- _validate_input_tools(tools)
-
- extra_headers = {
- "X-Stainless-Helper-Method": "beta.chat.completions.parse",
- **(extra_headers or {}),
- }
-
- def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
- return _parse_chat_completion(
- response_format=response_format,
- chat_completion=raw_completion,
- input_tools=tools,
- )
-
- return self._post(
- "/chat/completions",
- body=maybe_transform(
- {
- "messages": messages,
- "model": model,
- "audio": audio,
- "frequency_penalty": frequency_penalty,
- "function_call": function_call,
- "functions": functions,
- "logit_bias": logit_bias,
- "logprobs": logprobs,
- "max_completion_tokens": max_completion_tokens,
- "max_tokens": max_tokens,
- "metadata": metadata,
- "modalities": modalities,
- "n": n,
- "parallel_tool_calls": parallel_tool_calls,
- "prediction": prediction,
- "presence_penalty": presence_penalty,
- "reasoning_effort": reasoning_effort,
- "response_format": _type_to_response_format(response_format),
- "seed": seed,
- "service_tier": service_tier,
- "stop": stop,
- "store": store,
- "stream": False,
- "stream_options": stream_options,
- "temperature": temperature,
- "tool_choice": tool_choice,
- "tools": tools,
- "top_logprobs": top_logprobs,
- "top_p": top_p,
- "user": user,
- "web_search_options": web_search_options,
- },
- completion_create_params.CompletionCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- post_parser=parser,
- ),
- # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
- # in the `parser` function above
- cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
- stream=False,
- )
-
- def stream(
- self,
- *,
- messages: Iterable[ChatCompletionMessageParam],
- model: Union[str, ChatModel],
- audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
- response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
- frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
- functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
- logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
- logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
- max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
- modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
- n: Optional[int] | NotGiven = NOT_GIVEN,
- parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
- prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
- presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
- seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
- stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
- store: Optional[bool] | NotGiven = NOT_GIVEN,
- stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
- temperature: Optional[float] | NotGiven = NOT_GIVEN,
- tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
- tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
- top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
- top_p: Optional[float] | NotGiven = NOT_GIVEN,
- user: str | NotGiven = NOT_GIVEN,
- web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> ChatCompletionStreamManager[ResponseFormatT]:
- """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
- and automatic accumulation of each delta.
-
- This also supports all of the parsing utilities that `.parse()` does.
-
- Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
-
- ```py
- with client.beta.chat.completions.stream(
- model="gpt-4o-2024-08-06",
- messages=[...],
- ) as stream:
- for event in stream:
- if event.type == "content.delta":
- print(event.delta, flush=True, end="")
- ```
-
- When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
-
- When the context manager exits, the response will be closed, however the `stream` instance is still available outside
- the context manager.
- """
- extra_headers = {
- "X-Stainless-Helper-Method": "beta.chat.completions.stream",
- **(extra_headers or {}),
- }
-
- api_request: partial[Stream[ChatCompletionChunk]] = partial(
- self._client.chat.completions.create,
- messages=messages,
- model=model,
- audio=audio,
- stream=True,
- response_format=_type_to_response_format(response_format),
- frequency_penalty=frequency_penalty,
- function_call=function_call,
- functions=functions,
- logit_bias=logit_bias,
- logprobs=logprobs,
- max_completion_tokens=max_completion_tokens,
- max_tokens=max_tokens,
- metadata=metadata,
- modalities=modalities,
- n=n,
- parallel_tool_calls=parallel_tool_calls,
- prediction=prediction,
- presence_penalty=presence_penalty,
- reasoning_effort=reasoning_effort,
- seed=seed,
- service_tier=service_tier,
- store=store,
- stop=stop,
- stream_options=stream_options,
- temperature=temperature,
- tool_choice=tool_choice,
- tools=tools,
- top_logprobs=top_logprobs,
- top_p=top_p,
- user=user,
- web_search_options=web_search_options,
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- )
- return ChatCompletionStreamManager(
- api_request,
- response_format=response_format,
- input_tools=tools,
- )
-
-
-class AsyncCompletions(AsyncAPIResource):
- @cached_property
- def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return the
- the raw response object instead of the parsed content.
-
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return AsyncCompletionsWithRawResponse(self)
-
- @cached_property
- def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return AsyncCompletionsWithStreamingResponse(self)
-
- async def parse(
- self,
- *,
- messages: Iterable[ChatCompletionMessageParam],
- model: Union[str, ChatModel],
- audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
- response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
- frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
- functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
- logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
- logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
- max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
- modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
- n: Optional[int] | NotGiven = NOT_GIVEN,
- parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
- prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
- presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
- seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
- stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
- store: Optional[bool] | NotGiven = NOT_GIVEN,
- stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
- temperature: Optional[float] | NotGiven = NOT_GIVEN,
- tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
- tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
- top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
- top_p: Optional[float] | NotGiven = NOT_GIVEN,
- user: str | NotGiven = NOT_GIVEN,
- web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> ParsedChatCompletion[ResponseFormatT]:
- """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
- & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
-
- You can pass a pydantic model to this method and it will automatically convert the model
- into a JSON schema, send it to the API and parse the response content back into the given model.
-
- This method will also automatically parse `function` tool calls if:
- - You use the `openai.pydantic_function_tool()` helper method
- - You mark your tool schema with `"strict": True`
-
- Example usage:
- ```py
- from pydantic import BaseModel
- from openai import AsyncOpenAI
-
-
- class Step(BaseModel):
- explanation: str
- output: str
-
-
- class MathResponse(BaseModel):
- steps: List[Step]
- final_answer: str
-
-
- client = AsyncOpenAI()
- completion = await client.beta.chat.completions.parse(
- model="gpt-4o-2024-08-06",
- messages=[
- {"role": "system", "content": "You are a helpful math tutor."},
- {"role": "user", "content": "solve 8x + 31 = 2"},
- ],
- response_format=MathResponse,
- )
-
- message = completion.choices[0].message
- if message.parsed:
- print(message.parsed.steps)
- print("answer: ", message.parsed.final_answer)
- ```
- """
- _validate_input_tools(tools)
-
- extra_headers = {
- "X-Stainless-Helper-Method": "beta.chat.completions.parse",
- **(extra_headers or {}),
- }
-
- def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
- return _parse_chat_completion(
- response_format=response_format,
- chat_completion=raw_completion,
- input_tools=tools,
- )
-
- return await self._post(
- "/chat/completions",
- body=await async_maybe_transform(
- {
- "messages": messages,
- "model": model,
- "audio": audio,
- "frequency_penalty": frequency_penalty,
- "function_call": function_call,
- "functions": functions,
- "logit_bias": logit_bias,
- "logprobs": logprobs,
- "max_completion_tokens": max_completion_tokens,
- "max_tokens": max_tokens,
- "metadata": metadata,
- "modalities": modalities,
- "n": n,
- "parallel_tool_calls": parallel_tool_calls,
- "prediction": prediction,
- "presence_penalty": presence_penalty,
- "reasoning_effort": reasoning_effort,
- "response_format": _type_to_response_format(response_format),
- "seed": seed,
- "service_tier": service_tier,
- "store": store,
- "stop": stop,
- "stream": False,
- "stream_options": stream_options,
- "temperature": temperature,
- "tool_choice": tool_choice,
- "tools": tools,
- "top_logprobs": top_logprobs,
- "top_p": top_p,
- "user": user,
- "web_search_options": web_search_options,
- },
- completion_create_params.CompletionCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- post_parser=parser,
- ),
- # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
- # in the `parser` function above
- cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
- stream=False,
- )
-
- def stream(
- self,
- *,
- messages: Iterable[ChatCompletionMessageParam],
- model: Union[str, ChatModel],
- audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
- response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
- frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
- functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
- logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
- logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
- max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
- metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
- modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
- n: Optional[int] | NotGiven = NOT_GIVEN,
- parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
- prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
- presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
- reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
- seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
- stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
- store: Optional[bool] | NotGiven = NOT_GIVEN,
- stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
- temperature: Optional[float] | NotGiven = NOT_GIVEN,
- tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
- tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
- top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
- top_p: Optional[float] | NotGiven = NOT_GIVEN,
- user: str | NotGiven = NOT_GIVEN,
- web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
- ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
- """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
- and automatic accumulation of each delta.
-
- This also supports all of the parsing utilities that `.parse()` does.
-
- Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
-
- ```py
- async with client.beta.chat.completions.stream(
- model="gpt-4o-2024-08-06",
- messages=[...],
- ) as stream:
- async for event in stream:
- if event.type == "content.delta":
- print(event.delta, flush=True, end="")
- ```
-
- When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
-
- When the context manager exits, the response will be closed, however the `stream` instance is still available outside
- the context manager.
- """
- _validate_input_tools(tools)
-
- extra_headers = {
- "X-Stainless-Helper-Method": "beta.chat.completions.stream",
- **(extra_headers or {}),
- }
-
- api_request = self._client.chat.completions.create(
- messages=messages,
- model=model,
- audio=audio,
- stream=True,
- response_format=_type_to_response_format(response_format),
- frequency_penalty=frequency_penalty,
- function_call=function_call,
- functions=functions,
- logit_bias=logit_bias,
- logprobs=logprobs,
- max_completion_tokens=max_completion_tokens,
- max_tokens=max_tokens,
- metadata=metadata,
- modalities=modalities,
- n=n,
- parallel_tool_calls=parallel_tool_calls,
- prediction=prediction,
- presence_penalty=presence_penalty,
- reasoning_effort=reasoning_effort,
- seed=seed,
- service_tier=service_tier,
- stop=stop,
- store=store,
- stream_options=stream_options,
- temperature=temperature,
- tool_choice=tool_choice,
- tools=tools,
- top_logprobs=top_logprobs,
- top_p=top_p,
- user=user,
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- web_search_options=web_search_options,
- )
- return AsyncChatCompletionStreamManager(
- api_request,
- response_format=response_format,
- input_tools=tools,
- )
-
-
-class CompletionsWithRawResponse:
- def __init__(self, completions: Completions) -> None:
- self._completions = completions
-
- self.parse = _legacy_response.to_raw_response_wrapper(
- completions.parse,
- )
-
-
-class AsyncCompletionsWithRawResponse:
- def __init__(self, completions: AsyncCompletions) -> None:
- self._completions = completions
-
- self.parse = _legacy_response.async_to_raw_response_wrapper(
- completions.parse,
- )
-
-
-class CompletionsWithStreamingResponse:
- def __init__(self, completions: Completions) -> None:
- self._completions = completions
-
- self.parse = to_streamed_response_wrapper(
- completions.parse,
- )
-
-
-class AsyncCompletionsWithStreamingResponse:
- def __init__(self, completions: AsyncCompletions) -> None:
- self._completions = completions
-
- self.parse = async_to_streamed_response_wrapper(
- completions.parse,
- )
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index a6b89fc833..2a5622b092 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -3,7 +3,8 @@
from __future__ import annotations
import inspect
-from typing import Dict, List, Union, Iterable, Optional
+from typing import Dict, List, Type, Union, Iterable, Optional, cast
+from functools import partial
from typing_extensions import Literal, overload
import httpx
@@ -32,11 +33,19 @@
completion_update_params,
)
from ...._base_client import AsyncPaginator, make_request_options
+from ....lib._parsing import (
+ ResponseFormatT,
+ validate_input_tools as _validate_input_tools,
+ parse_chat_completion as _parse_chat_completion,
+ type_to_response_format_param as _type_to_response_format,
+)
+from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
from ....types.shared.chat_model import ChatModel
from ....types.chat.chat_completion import ChatCompletion
from ....types.shared_params.metadata import Metadata
from ....types.shared.reasoning_effort import ReasoningEffort
from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.parsed_chat_completion import ParsedChatCompletion
from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
@@ -72,6 +81,153 @@ def with_streaming_response(self) -> CompletionsWithStreamingResponse:
"""
return CompletionsWithStreamingResponse(self)
+ def parse(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ParsedChatCompletion[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+ & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+ You can pass a pydantic model to this method and it will automatically convert the model
+ into a JSON schema, send it to the API and parse the response content back into the given model.
+
+ This method will also automatically parse `function` tool calls if:
+ - You use the `openai.pydantic_function_tool()` helper method
+ - You mark your tool schema with `"strict": True`
+
+ Example usage:
+ ```py
+ from pydantic import BaseModel
+ from openai import OpenAI
+
+
+ class Step(BaseModel):
+ explanation: str
+ output: str
+
+
+ class MathResponse(BaseModel):
+ steps: List[Step]
+ final_answer: str
+
+
+ client = OpenAI()
+ completion = client.chat.completions.parse(
+ model="gpt-4o-2024-08-06",
+ messages=[
+ {"role": "system", "content": "You are a helpful math tutor."},
+ {"role": "user", "content": "solve 8x + 31 = 2"},
+ ],
+ response_format=MathResponse,
+ )
+
+ message = completion.choices[0].message
+ if message.parsed:
+ print(message.parsed.steps)
+ print("answer: ", message.parsed.final_answer)
+ ```
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "chat.completions.parse",
+ **(extra_headers or {}),
+ }
+
+ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+ return _parse_chat_completion(
+ response_format=response_format,
+ chat_completion=raw_completion,
+ input_tools=tools,
+ )
+
+ return self._post(
+ "/chat/completions",
+ body=maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "audio": audio,
+ "frequency_penalty": frequency_penalty,
+ "function_call": function_call,
+ "functions": functions,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "modalities": modalities,
+ "n": n,
+ "parallel_tool_calls": parallel_tool_calls,
+ "prediction": prediction,
+ "presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
+ "response_format": _type_to_response_format(response_format),
+ "seed": seed,
+ "service_tier": service_tier,
+ "stop": stop,
+ "store": store,
+ "stream": False,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ "web_search_options": web_search_options,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ post_parser=parser,
+ ),
+ # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+ # in the `parser` function above
+ cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+ stream=False,
+ )
+
@overload
def create(
self,
@@ -95,7 +251,7 @@ def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -256,23 +412,23 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -283,6 +439,8 @@ def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
@@ -365,7 +523,7 @@ def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -534,23 +692,23 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -561,6 +719,8 @@ def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream_options: Options for streaming response. Only set this when you set `stream: true`.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -634,7 +794,7 @@ def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -803,23 +963,23 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -830,6 +990,8 @@ def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream_options: Options for streaming response. Only set this when you set `stream: true`.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -902,7 +1064,7 @@ def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -1150,6 +1312,117 @@ def delete(
cast_to=ChatCompletionDeleted,
)
+ def stream(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ChatCompletionStreamManager[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+ and automatic accumulation of each delta.
+
+ This also supports all of the parsing utilities that `.parse()` does.
+
+ Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+ ```py
+ with client.chat.completions.stream(
+ model="gpt-4o-2024-08-06",
+ messages=[...],
+ ) as stream:
+ for event in stream:
+ if event.type == "content.delta":
+ print(event.delta, flush=True, end="")
+ ```
+
+ When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+ When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+ the context manager.
+ """
+ extra_headers = {
+ "X-Stainless-Helper-Method": "chat.completions.stream",
+ **(extra_headers or {}),
+ }
+
+ api_request: partial[Stream[ChatCompletionChunk]] = partial(
+ self.create,
+ messages=messages,
+ model=model,
+ audio=audio,
+ stream=True,
+ response_format=_type_to_response_format(response_format),
+ frequency_penalty=frequency_penalty,
+ function_call=function_call,
+ functions=functions,
+ logit_bias=logit_bias,
+ logprobs=logprobs,
+ max_completion_tokens=max_completion_tokens,
+ max_tokens=max_tokens,
+ metadata=metadata,
+ modalities=modalities,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ prediction=prediction,
+ presence_penalty=presence_penalty,
+ reasoning_effort=reasoning_effort,
+ seed=seed,
+ service_tier=service_tier,
+ store=store,
+ stop=stop,
+ stream_options=stream_options,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
+ user=user,
+ web_search_options=web_search_options,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ )
+ return ChatCompletionStreamManager(
+ api_request,
+ response_format=response_format,
+ input_tools=tools,
+ )
+
class AsyncCompletions(AsyncAPIResource):
@cached_property
@@ -1175,6 +1448,153 @@ def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
"""
return AsyncCompletionsWithStreamingResponse(self)
+ async def parse(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ParsedChatCompletion[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
+ & returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
+
+ You can pass a pydantic model to this method and it will automatically convert the model
+ into a JSON schema, send it to the API and parse the response content back into the given model.
+
+ This method will also automatically parse `function` tool calls if:
+ - You use the `openai.pydantic_function_tool()` helper method
+ - You mark your tool schema with `"strict": True`
+
+ Example usage:
+ ```py
+ from pydantic import BaseModel
+ from openai import AsyncOpenAI
+
+
+ class Step(BaseModel):
+ explanation: str
+ output: str
+
+
+ class MathResponse(BaseModel):
+ steps: List[Step]
+ final_answer: str
+
+
+ client = AsyncOpenAI()
+ completion = await client.chat.completions.parse(
+ model="gpt-4o-2024-08-06",
+ messages=[
+ {"role": "system", "content": "You are a helpful math tutor."},
+ {"role": "user", "content": "solve 8x + 31 = 2"},
+ ],
+ response_format=MathResponse,
+ )
+
+ message = completion.choices[0].message
+ if message.parsed:
+ print(message.parsed.steps)
+ print("answer: ", message.parsed.final_answer)
+ ```
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "chat.completions.parse",
+ **(extra_headers or {}),
+ }
+
+ def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
+ return _parse_chat_completion(
+ response_format=response_format,
+ chat_completion=raw_completion,
+ input_tools=tools,
+ )
+
+ return await self._post(
+ "/chat/completions",
+ body=await async_maybe_transform(
+ {
+ "messages": messages,
+ "model": model,
+ "audio": audio,
+ "frequency_penalty": frequency_penalty,
+ "function_call": function_call,
+ "functions": functions,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_completion_tokens": max_completion_tokens,
+ "max_tokens": max_tokens,
+ "metadata": metadata,
+ "modalities": modalities,
+ "n": n,
+ "parallel_tool_calls": parallel_tool_calls,
+ "prediction": prediction,
+ "presence_penalty": presence_penalty,
+ "reasoning_effort": reasoning_effort,
+ "response_format": _type_to_response_format(response_format),
+ "seed": seed,
+ "service_tier": service_tier,
+ "store": store,
+ "stop": stop,
+ "stream": False,
+ "stream_options": stream_options,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "user": user,
+ "web_search_options": web_search_options,
+ },
+ completion_create_params.CompletionCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ post_parser=parser,
+ ),
+ # we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
+ # in the `parser` function above
+ cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
+ stream=False,
+ )
+
@overload
async def create(
self,
@@ -1198,7 +1618,7 @@ async def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1359,23 +1779,23 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -1386,6 +1806,8 @@ async def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
@@ -1468,7 +1890,7 @@ async def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1637,23 +2059,23 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -1664,6 +2086,8 @@ async def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream_options: Options for streaming response. Only set this when you set `stream: true`.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1737,7 +2161,7 @@ async def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1906,23 +2330,23 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
stop: Not supported with latest reasoning models `o3` and `o4-mini`.
@@ -1933,6 +2357,8 @@ async def create(
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
+
stream_options: Options for streaming response. Only set this when you set `stream: true`.
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -2005,7 +2431,7 @@ async def create(
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -2253,11 +2679,126 @@ async def delete(
cast_to=ChatCompletionDeleted,
)
+ def stream(
+ self,
+ *,
+ messages: Iterable[ChatCompletionMessageParam],
+ model: Union[str, ChatModel],
+ audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+ response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
+ frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+ functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+ logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+ logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+ max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+ modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+ n: Optional[int] | NotGiven = NOT_GIVEN,
+ parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+ prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+ presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+ reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+ seed: Optional[int] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+ store: Optional[bool] | NotGiven = NOT_GIVEN,
+ stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+ temperature: Optional[float] | NotGiven = NOT_GIVEN,
+ tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+ tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+ top_p: Optional[float] | NotGiven = NOT_GIVEN,
+ user: str | NotGiven = NOT_GIVEN,
+ web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
+ """Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
+ and automatic accumulation of each delta.
+
+ This also supports all of the parsing utilities that `.parse()` does.
+
+ Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
+
+ ```py
+ async with client.chat.completions.stream(
+ model="gpt-4o-2024-08-06",
+ messages=[...],
+ ) as stream:
+ async for event in stream:
+ if event.type == "content.delta":
+ print(event.delta, flush=True, end="")
+ ```
+
+ When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
+
+ When the context manager exits, the response will be closed, however the `stream` instance is still available outside
+ the context manager.
+ """
+ _validate_input_tools(tools)
+
+ extra_headers = {
+ "X-Stainless-Helper-Method": "chat.completions.stream",
+ **(extra_headers or {}),
+ }
+
+ api_request = self.create(
+ messages=messages,
+ model=model,
+ audio=audio,
+ stream=True,
+ response_format=_type_to_response_format(response_format),
+ frequency_penalty=frequency_penalty,
+ function_call=function_call,
+ functions=functions,
+ logit_bias=logit_bias,
+ logprobs=logprobs,
+ max_completion_tokens=max_completion_tokens,
+ max_tokens=max_tokens,
+ metadata=metadata,
+ modalities=modalities,
+ n=n,
+ parallel_tool_calls=parallel_tool_calls,
+ prediction=prediction,
+ presence_penalty=presence_penalty,
+ reasoning_effort=reasoning_effort,
+ seed=seed,
+ service_tier=service_tier,
+ stop=stop,
+ store=store,
+ stream_options=stream_options,
+ temperature=temperature,
+ tool_choice=tool_choice,
+ tools=tools,
+ top_logprobs=top_logprobs,
+ top_p=top_p,
+ user=user,
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ web_search_options=web_search_options,
+ )
+ return AsyncChatCompletionStreamManager(
+ api_request,
+ response_format=response_format,
+ input_tools=tools,
+ )
+
class CompletionsWithRawResponse:
def __init__(self, completions: Completions) -> None:
self._completions = completions
+ self.parse = _legacy_response.to_raw_response_wrapper(
+ completions.parse,
+ )
self.create = _legacy_response.to_raw_response_wrapper(
completions.create,
)
@@ -2283,6 +2824,9 @@ class AsyncCompletionsWithRawResponse:
def __init__(self, completions: AsyncCompletions) -> None:
self._completions = completions
+ self.parse = _legacy_response.async_to_raw_response_wrapper(
+ completions.parse,
+ )
self.create = _legacy_response.async_to_raw_response_wrapper(
completions.create,
)
@@ -2308,6 +2852,9 @@ class CompletionsWithStreamingResponse:
def __init__(self, completions: Completions) -> None:
self._completions = completions
+ self.parse = to_streamed_response_wrapper(
+ completions.parse,
+ )
self.create = to_streamed_response_wrapper(
completions.create,
)
@@ -2333,6 +2880,9 @@ class AsyncCompletionsWithStreamingResponse:
def __init__(self, completions: AsyncCompletions) -> None:
self._completions = completions
+ self.parse = async_to_streamed_response_wrapper(
+ completions.parse,
+ )
self.create = async_to_streamed_response_wrapper(
completions.create,
)
@@ -2357,5 +2907,5 @@ def messages(self) -> AsyncMessagesWithStreamingResponse:
def validate_response_format(response_format: object) -> None:
if inspect.isclass(response_format) and issubclass(response_format, pydantic.BaseModel):
raise TypeError(
- "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `beta.chat.completions.parse()` instead"
+ "You tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `chat.completions.parse()` instead"
)
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index 841d198a5b..aaf2088f38 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -81,19 +81,21 @@ def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -125,18 +127,19 @@ def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -158,6 +161,11 @@ def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -185,23 +193,23 @@ def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -242,6 +250,9 @@ def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -280,18 +291,20 @@ def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -330,18 +343,19 @@ def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -363,6 +377,11 @@ def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -390,23 +409,23 @@ def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -440,6 +459,9 @@ def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -478,18 +500,20 @@ def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -528,18 +552,19 @@ def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -561,6 +586,11 @@ def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -588,23 +618,23 @@ def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -638,6 +668,9 @@ def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -674,19 +707,21 @@ def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -706,6 +741,7 @@ def create(
"input": input,
"instructions": instructions,
"max_output_tokens": max_output_tokens,
+ "max_tool_calls": max_tool_calls,
"metadata": metadata,
"model": model,
"parallel_tool_calls": parallel_tool_calls,
@@ -719,6 +755,7 @@ def create(
"text": text,
"tool_choice": tool_choice,
"tools": tools,
+ "top_logprobs": top_logprobs,
"top_p": top_p,
"truncation": truncation,
"user": user,
@@ -1299,19 +1336,21 @@ async def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -1343,18 +1382,19 @@ async def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -1376,6 +1416,11 @@ async def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -1403,23 +1448,23 @@ async def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -1460,6 +1505,9 @@ async def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -1498,18 +1546,20 @@ async def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -1548,18 +1598,19 @@ async def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -1581,6 +1632,11 @@ async def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -1608,23 +1664,23 @@ async def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -1658,6 +1714,9 @@ async def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -1696,18 +1755,20 @@ async def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -1746,18 +1807,19 @@ async def create(
include: Specify additional output data to include in the model response. Currently
supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
input: Text, image, or file inputs to the model, used to generate a response.
@@ -1779,6 +1841,11 @@ async def create(
including visible output tokens and
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+ max_tool_calls: The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
for storing additional information about the object in a structured format, and
querying for objects via API or the dashboard.
@@ -1806,23 +1873,23 @@ async def create(
Configuration options for
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
- service_tier: Specifies the latency tier to use for processing the request. This parameter is
- relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
store: Whether to store the generated model response for later retrieval via API.
@@ -1856,6 +1923,9 @@ async def create(
the model to call your own code. Learn more about
[function calling](https://platform.openai.com/docs/guides/function-calling).
+ top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1
means only the tokens comprising the top 10% probability mass are considered.
@@ -1892,19 +1962,21 @@ async def create(
input: Union[str, ResponseInputParam] | NotGiven = NOT_GIVEN,
instructions: Optional[str] | NotGiven = NOT_GIVEN,
max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+ max_tool_calls: Optional[int] | NotGiven = NOT_GIVEN,
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
model: ResponsesModel | NotGiven = NOT_GIVEN,
parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
prompt: Optional[ResponsePromptParam] | NotGiven = NOT_GIVEN,
reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] | NotGiven = NOT_GIVEN,
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] | NotGiven = NOT_GIVEN,
store: Optional[bool] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+ top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
top_p: Optional[float] | NotGiven = NOT_GIVEN,
truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
@@ -1924,6 +1996,7 @@ async def create(
"input": input,
"instructions": instructions,
"max_output_tokens": max_output_tokens,
+ "max_tool_calls": max_tool_calls,
"metadata": metadata,
"model": model,
"parallel_tool_calls": parallel_tool_calls,
@@ -1937,6 +2010,7 @@ async def create(
"text": text,
"tool_choice": tool_choice,
"tools": tools,
+ "top_logprobs": top_logprobs,
"top_p": top_p,
"truncation": truncation,
"user": user,
diff --git a/src/openai/resources/webhooks.py b/src/openai/resources/webhooks.py
new file mode 100644
index 0000000000..3e13d3faae
--- /dev/null
+++ b/src/openai/resources/webhooks.py
@@ -0,0 +1,210 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import hmac
+import json
+import time
+import base64
+import hashlib
+from typing import cast
+
+from .._types import HeadersLike
+from .._utils import get_required_header
+from .._models import construct_type
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._exceptions import InvalidWebhookSignatureError
+from ..types.webhooks.unwrap_webhook_event import UnwrapWebhookEvent
+
+__all__ = ["Webhooks", "AsyncWebhooks"]
+
+
+class Webhooks(SyncAPIResource):
+ def unwrap(
+ self,
+ payload: str | bytes,
+ headers: HeadersLike,
+ *,
+ secret: str | None = None,
+ ) -> UnwrapWebhookEvent:
+ """Validates that the given payload was sent by OpenAI and parses the payload."""
+ if secret is None:
+ secret = self._client.webhook_secret
+
+ self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+ return cast(
+ UnwrapWebhookEvent,
+ construct_type(
+ type_=UnwrapWebhookEvent,
+ value=json.loads(payload),
+ ),
+ )
+
+ def verify_signature(
+ self,
+ payload: str | bytes,
+ headers: HeadersLike,
+ *,
+ secret: str | None = None,
+ tolerance: int = 300,
+ ) -> None:
+ """Validates whether or not the webhook payload was sent by OpenAI.
+
+ Args:
+ payload: The webhook payload
+ headers: The webhook headers
+ secret: The webhook secret (optional, will use client secret if not provided)
+ tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+ """
+ if secret is None:
+ secret = self._client.webhook_secret
+
+ if secret is None:
+ raise ValueError(
+ "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+ "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+ )
+
+ signature_header = get_required_header(headers, "webhook-signature")
+ timestamp = get_required_header(headers, "webhook-timestamp")
+ webhook_id = get_required_header(headers, "webhook-id")
+
+ # Validate timestamp to prevent replay attacks
+ try:
+ timestamp_seconds = int(timestamp)
+ except ValueError:
+ raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+ now = int(time.time())
+
+ if now - timestamp_seconds > tolerance:
+ raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+ if timestamp_seconds > now + tolerance:
+ raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+ # Extract signatures from v1, format
+ # The signature header can have multiple values, separated by spaces.
+ # Each value is in the format v1,. We should accept if any match.
+ signatures: list[str] = []
+ for part in signature_header.split():
+ if part.startswith("v1,"):
+ signatures.append(part[3:])
+ else:
+ signatures.append(part)
+
+ # Decode the secret if it starts with whsec_
+ if secret.startswith("whsec_"):
+ decoded_secret = base64.b64decode(secret[6:])
+ else:
+ decoded_secret = secret.encode()
+
+ body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+ # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+ signed_payload = f"{webhook_id}.{timestamp}.{body}"
+ expected_signature = base64.b64encode(
+ hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+ ).decode()
+
+ # Accept if any signature matches
+ if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+ raise InvalidWebhookSignatureError(
+ "The given webhook signature does not match the expected signature"
+ ) from None
+
+
+class AsyncWebhooks(AsyncAPIResource):
+ def unwrap(
+ self,
+ payload: str | bytes,
+ headers: HeadersLike,
+ *,
+ secret: str | None = None,
+ ) -> UnwrapWebhookEvent:
+ """Validates that the given payload was sent by OpenAI and parses the payload."""
+ if secret is None:
+ secret = self._client.webhook_secret
+
+ self.verify_signature(payload=payload, headers=headers, secret=secret)
+
+ body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+ return cast(
+ UnwrapWebhookEvent,
+ construct_type(
+ type_=UnwrapWebhookEvent,
+ value=json.loads(body),
+ ),
+ )
+
+ def verify_signature(
+ self,
+ payload: str | bytes,
+ headers: HeadersLike,
+ *,
+ secret: str | None = None,
+ tolerance: int = 300,
+ ) -> None:
+ """Validates whether or not the webhook payload was sent by OpenAI.
+
+ Args:
+ payload: The webhook payload
+ headers: The webhook headers
+ secret: The webhook secret (optional, will use client secret if not provided)
+ tolerance: Maximum age of the webhook in seconds (default: 300 = 5 minutes)
+ """
+ if secret is None:
+ secret = self._client.webhook_secret
+
+ if secret is None:
+ raise ValueError(
+ "The webhook secret must either be set using the env var, OPENAI_WEBHOOK_SECRET, "
+ "on the client class, OpenAI(webhook_secret='123'), or passed to this function"
+ ) from None
+
+ signature_header = get_required_header(headers, "webhook-signature")
+ timestamp = get_required_header(headers, "webhook-timestamp")
+ webhook_id = get_required_header(headers, "webhook-id")
+
+ # Validate timestamp to prevent replay attacks
+ try:
+ timestamp_seconds = int(timestamp)
+ except ValueError:
+ raise InvalidWebhookSignatureError("Invalid webhook timestamp format") from None
+
+ now = int(time.time())
+
+ if now - timestamp_seconds > tolerance:
+ raise InvalidWebhookSignatureError("Webhook timestamp is too old") from None
+
+ if timestamp_seconds > now + tolerance:
+ raise InvalidWebhookSignatureError("Webhook timestamp is too new") from None
+
+ # Extract signatures from v1, format
+ # The signature header can have multiple values, separated by spaces.
+ # Each value is in the format v1,. We should accept if any match.
+ signatures: list[str] = []
+ for part in signature_header.split():
+ if part.startswith("v1,"):
+ signatures.append(part[3:])
+ else:
+ signatures.append(part)
+
+ # Decode the secret if it starts with whsec_
+ if secret.startswith("whsec_"):
+ decoded_secret = base64.b64decode(secret[6:])
+ else:
+ decoded_secret = secret.encode()
+
+ body = payload.decode("utf-8") if isinstance(payload, bytes) else payload
+
+ # Prepare the signed payload (OpenAI uses webhookId.timestamp.payload format)
+ signed_payload = f"{webhook_id}.{timestamp}.{body}"
+ expected_signature = base64.b64encode(
+ hmac.new(decoded_secret, signed_payload.encode(), hashlib.sha256).digest()
+ ).decode()
+
+ # Accept if any signature matches
+ if not any(hmac.compare_digest(expected_signature, sig) for sig in signatures):
+ raise InvalidWebhookSignatureError("The given webhook signature does not match the expected signature")
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index 863cc2e81a..afc23e3f3d 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -59,25 +59,24 @@ class ChatCompletion(BaseModel):
object: Literal["chat.completion"]
"""The object type, which is always `chat.completion`."""
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
- """Specifies the latency tier to use for processing the request.
-
- This parameter is relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+ """Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
"""
system_fingerprint: Optional[str] = None
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 3d3d68602a..da6e315830 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -128,25 +128,24 @@ class ChatCompletionChunk(BaseModel):
object: Literal["chat.completion.chunk"]
"""The object type, which is always `chat.completion.chunk`."""
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
- """Specifies the latency tier to use for processing the request.
-
- This parameter is relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+ """Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
"""
system_fingerprint: Optional[str] = None
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index f1ed444b79..44ea853041 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -208,25 +208,24 @@ class CompletionCreateParamsBase(TypedDict, total=False):
in the backend.
"""
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
- """Specifies the latency tier to use for processing the request.
-
- This parameter is relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+ """Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
"""
stop: Union[Optional[str], List[str], None]
@@ -241,6 +240,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
Whether or not to store the output of this chat completion request for use in
our [model distillation](https://platform.openai.com/docs/guides/distillation)
or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+ Supports text and image inputs. Note: image inputs over 10MB will be dropped.
"""
stream_options: Optional[ChatCompletionStreamOptionsParam]
diff --git a/src/openai/types/images_response.py b/src/openai/types/images_response.py
index df454afa4d..2a8ca728ab 100644
--- a/src/openai/types/images_response.py
+++ b/src/openai/types/images_response.py
@@ -1,6 +1,7 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from typing import List, Optional
+from typing_extensions import Literal
from .image import Image
from .._models import BaseModel
@@ -34,8 +35,26 @@ class ImagesResponse(BaseModel):
created: int
"""The Unix timestamp (in seconds) of when the image was created."""
+ background: Optional[Literal["transparent", "opaque"]] = None
+ """The background parameter used for the image generation.
+
+ Either `transparent` or `opaque`.
+ """
+
data: Optional[List[Image]] = None
"""The list of generated images."""
+ output_format: Optional[Literal["png", "webp", "jpeg"]] = None
+ """The output format of the image generation. Either `png`, `webp`, or `jpeg`."""
+
+ quality: Optional[Literal["low", "medium", "high"]] = None
+ """The quality of the image generated. Either `low`, `medium`, or `high`."""
+
+ size: Optional[Literal["1024x1024", "1024x1536", "1536x1024"]] = None
+ """The size of the image generated.
+
+ Either `1024x1024`, `1024x1536`, or `1536x1024`.
+ """
+
usage: Optional[Usage] = None
"""For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
index ba257eabc2..4316e47730 100644
--- a/src/openai/types/responses/__init__.py
+++ b/src/openai/types/responses/__init__.py
@@ -20,6 +20,7 @@
)
from .response_prompt import ResponsePrompt as ResponsePrompt
from .response_status import ResponseStatus as ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp as ToolChoiceMcp
from .web_search_tool import WebSearchTool as WebSearchTool
from .file_search_tool import FileSearchTool as FileSearchTool
from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
@@ -43,6 +44,7 @@
from .response_prompt_param import ResponsePromptParam as ResponsePromptParam
from .response_queued_event import ResponseQueuedEvent as ResponseQueuedEvent
from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
+from .tool_choice_mcp_param import ToolChoiceMcpParam as ToolChoiceMcpParam
from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam
from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
from .input_item_list_params import InputItemListParams as InputItemListParams
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
index 75d1c5e3df..db85d87f4e 100644
--- a/src/openai/types/responses/response.py
+++ b/src/openai/types/responses/response.py
@@ -9,6 +9,7 @@
from .response_usage import ResponseUsage
from .response_prompt import ResponsePrompt
from .response_status import ResponseStatus
+from .tool_choice_mcp import ToolChoiceMcp
from ..shared.metadata import Metadata
from ..shared.reasoning import Reasoning
from .tool_choice_types import ToolChoiceTypes
@@ -27,7 +28,7 @@ class IncompleteDetails(BaseModel):
"""The reason why the response is incomplete."""
-ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction]
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction, ToolChoiceMcp]
class Response(BaseModel):
@@ -141,6 +142,14 @@ class Response(BaseModel):
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
"""
+ max_tool_calls: Optional[int] = None
+ """
+ The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+ """
+
previous_response_id: Optional[str] = None
"""The unique ID of the previous response to the model.
@@ -161,25 +170,24 @@ class Response(BaseModel):
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
"""
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]] = None
- """Specifies the latency tier to use for processing the request.
-
- This parameter is relevant for customers subscribed to the scale tier service:
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]] = None
+ """Specifies the processing type used for serving the request.
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
"""
status: Optional[ResponseStatus] = None
@@ -198,6 +206,12 @@ class Response(BaseModel):
- [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
"""
+ top_logprobs: Optional[int] = None
+ """
+ An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+ """
+
truncation: Optional[Literal["auto", "disabled"]] = None
"""The truncation strategy to use for the model response.
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
index 22acd6f653..0187e1fda8 100644
--- a/src/openai/types/responses/response_create_params.py
+++ b/src/openai/types/responses/response_create_params.py
@@ -10,6 +10,7 @@
from .tool_choice_options import ToolChoiceOptions
from .response_input_param import ResponseInputParam
from .response_prompt_param import ResponsePromptParam
+from .tool_choice_mcp_param import ToolChoiceMcpParam
from ..shared_params.metadata import Metadata
from .tool_choice_types_param import ToolChoiceTypesParam
from ..shared_params.reasoning import Reasoning
@@ -37,18 +38,19 @@ class ResponseCreateParamsBase(TypedDict, total=False):
Currently supported values are:
+ - `code_interpreter_call.outputs`: Includes the outputs of python code execution
+ in code interpreter tool call items.
+ - `computer_call_output.output.image_url`: Include image urls from the computer
+ call output.
- `file_search_call.results`: Include the search results of the file search tool
call.
- `message.input_image.image_url`: Include image urls from the input message.
- - `computer_call_output.output.image_url`: Include image urls from the computer
- call output.
+ - `message.output_text.logprobs`: Include logprobs with assistant messages.
- `reasoning.encrypted_content`: Includes an encrypted version of reasoning
tokens in reasoning item outputs. This enables reasoning items to be used in
multi-turn conversations when using the Responses API statelessly (like when
the `store` parameter is set to `false`, or when an organization is enrolled
in the zero data retention program).
- - `code_interpreter_call.outputs`: Includes the outputs of python code execution
- in code interpreter tool call items.
"""
input: Union[str, ResponseInputParam]
@@ -78,6 +80,14 @@ class ResponseCreateParamsBase(TypedDict, total=False):
[reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
"""
+ max_tool_calls: Optional[int]
+ """
+ The maximum number of total calls to built-in tools that can be processed in a
+ response. This maximum number applies across all built-in tool calls, not per
+ individual tool. Any further attempts to call a tool by the model will be
+ ignored.
+ """
+
metadata: Optional[Metadata]
"""Set of 16 key-value pairs that can be attached to an object.
@@ -120,25 +130,24 @@ class ResponseCreateParamsBase(TypedDict, total=False):
[reasoning models](https://platform.openai.com/docs/guides/reasoning).
"""
- service_tier: Optional[Literal["auto", "default", "flex", "scale"]]
- """Specifies the latency tier to use for processing the request.
-
- This parameter is relevant for customers subscribed to the scale tier service:
-
- - If set to 'auto', and the Project is Scale tier enabled, the system will
- utilize scale tier credits until they are exhausted.
- - If set to 'auto', and the Project is not Scale tier enabled, the request will
- be processed using the default service tier with a lower uptime SLA and no
- latency guarantee.
- - If set to 'default', the request will be processed using the default service
- tier with a lower uptime SLA and no latency guarantee.
- - If set to 'flex', the request will be processed with the Flex Processing
- service tier.
- [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+ service_tier: Optional[Literal["auto", "default", "flex", "scale", "priority"]]
+ """Specifies the processing type used for serving the request.
+
+ - If set to 'auto', then the request will be processed with the service tier
+ configured in the Project settings. Unless otherwise configured, the Project
+ will use 'default'.
+ - If set to 'default', then the requset will be processed with the standard
+ pricing and performance for the selected model.
+ - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or
+ 'priority', then the request will be processed with the corresponding service
+ tier. [Contact sales](https://openai.com/contact-sales) to learn more about
+ Priority processing.
- When not set, the default behavior is 'auto'.
- When this parameter is set, the response body will include the `service_tier`
- utilized.
+ When the `service_tier` parameter is set, the response body will include the
+ `service_tier` value based on the processing mode actually used to serve the
+ request. This response value may be different from the value set in the
+ parameter.
"""
store: Optional[bool]
@@ -186,6 +195,12 @@ class ResponseCreateParamsBase(TypedDict, total=False):
[function calling](https://platform.openai.com/docs/guides/function-calling).
"""
+ top_logprobs: Optional[int]
+ """
+ An integer between 0 and 20 specifying the number of most likely tokens to
+ return at each token position, each with an associated log probability.
+ """
+
top_p: Optional[float]
"""
An alternative to sampling with temperature, called nucleus sampling, where the
@@ -214,7 +229,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
"""
-ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam]
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam, ToolChoiceMcpParam]
class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
diff --git a/src/openai/types/responses/response_function_web_search.py b/src/openai/types/responses/response_function_web_search.py
index 44734b681f..164a1afdca 100644
--- a/src/openai/types/responses/response_function_web_search.py
+++ b/src/openai/types/responses/response_function_web_search.py
@@ -1,16 +1,57 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing_extensions import Literal
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+from ..._utils import PropertyInfo
from ..._models import BaseModel
-__all__ = ["ResponseFunctionWebSearch"]
+__all__ = ["ResponseFunctionWebSearch", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(BaseModel):
+ query: str
+ """The search query."""
+
+ type: Literal["search"]
+ """The action type."""
+
+ domains: Optional[List[str]] = None
+ """Domains to restrict the search or domains where results were found."""
+
+
+class ActionOpenPage(BaseModel):
+ type: Literal["open_page"]
+ """The action type."""
+
+ url: str
+ """The URL opened by the model."""
+
+
+class ActionFind(BaseModel):
+ pattern: str
+ """The pattern or text to search for within the page."""
+
+ type: Literal["find"]
+ """The action type."""
+
+ url: str
+ """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Annotated[Union[ActionSearch, ActionOpenPage, ActionFind], PropertyInfo(discriminator="type")]
class ResponseFunctionWebSearch(BaseModel):
id: str
"""The unique ID of the web search tool call."""
+ action: Action
+ """
+ An object describing the specific action taken in this web search call. Includes
+ details on how the model used the web (search, open_page, find).
+ """
+
status: Literal["in_progress", "searching", "completed", "failed"]
"""The status of the web search tool call."""
diff --git a/src/openai/types/responses/response_function_web_search_param.py b/src/openai/types/responses/response_function_web_search_param.py
index d413e60b12..04d8a5884b 100644
--- a/src/openai/types/responses/response_function_web_search_param.py
+++ b/src/openai/types/responses/response_function_web_search_param.py
@@ -2,15 +2,55 @@
from __future__ import annotations
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
-__all__ = ["ResponseFunctionWebSearchParam"]
+__all__ = ["ResponseFunctionWebSearchParam", "Action", "ActionSearch", "ActionOpenPage", "ActionFind"]
+
+
+class ActionSearch(TypedDict, total=False):
+ query: Required[str]
+ """The search query."""
+
+ type: Required[Literal["search"]]
+ """The action type."""
+
+ domains: List[str]
+ """Domains to restrict the search or domains where results were found."""
+
+
+class ActionOpenPage(TypedDict, total=False):
+ type: Required[Literal["open_page"]]
+ """The action type."""
+
+ url: Required[str]
+ """The URL opened by the model."""
+
+
+class ActionFind(TypedDict, total=False):
+ pattern: Required[str]
+ """The pattern or text to search for within the page."""
+
+ type: Required[Literal["find"]]
+ """The action type."""
+
+ url: Required[str]
+ """The URL of the page searched for the pattern."""
+
+
+Action: TypeAlias = Union[ActionSearch, ActionOpenPage, ActionFind]
class ResponseFunctionWebSearchParam(TypedDict, total=False):
id: Required[str]
"""The unique ID of the web search tool call."""
+ action: Required[Action]
+ """
+ An object describing the specific action taken in this web search call. Includes
+ details on how the model used the web (search, open_page, find).
+ """
+
status: Required[Literal["in_progress", "searching", "completed", "failed"]]
"""The status of the web search tool call."""
diff --git a/src/openai/types/responses/response_includable.py b/src/openai/types/responses/response_includable.py
index 28869832b0..c17a02560f 100644
--- a/src/openai/types/responses/response_includable.py
+++ b/src/openai/types/responses/response_includable.py
@@ -5,9 +5,10 @@
__all__ = ["ResponseIncludable"]
ResponseIncludable: TypeAlias = Literal[
+ "code_interpreter_call.outputs",
+ "computer_call_output.output.image_url",
"file_search_call.results",
"message.input_image.image_url",
- "computer_call_output.output.image_url",
+ "message.output_text.logprobs",
"reasoning.encrypted_content",
- "code_interpreter_call.outputs",
]
diff --git a/src/openai/types/responses/tool_choice_mcp.py b/src/openai/types/responses/tool_choice_mcp.py
new file mode 100644
index 0000000000..8763d81635
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceMcp"]
+
+
+class ToolChoiceMcp(BaseModel):
+ server_label: str
+ """The label of the MCP server to use."""
+
+ type: Literal["mcp"]
+ """For MCP tools, the type is always `mcp`."""
+
+ name: Optional[str] = None
+ """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_mcp_param.py b/src/openai/types/responses/tool_choice_mcp_param.py
new file mode 100644
index 0000000000..afcceb8cc5
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_mcp_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceMcpParam"]
+
+
+class ToolChoiceMcpParam(TypedDict, total=False):
+ server_label: Required[str]
+ """The label of the MCP server to use."""
+
+ type: Required[Literal["mcp"]]
+ """For MCP tools, the type is always `mcp`."""
+
+ name: Optional[str]
+ """The name of the tool to call on the server."""
diff --git a/src/openai/types/responses/tool_choice_types.py b/src/openai/types/responses/tool_choice_types.py
index b968324383..b31a826051 100644
--- a/src/openai/types/responses/tool_choice_types.py
+++ b/src/openai/types/responses/tool_choice_types.py
@@ -15,7 +15,6 @@ class ToolChoiceTypes(BaseModel):
"web_search_preview_2025_03_11",
"image_generation",
"code_interpreter",
- "mcp",
]
"""The type of hosted tool the model should to use.
@@ -28,6 +27,5 @@ class ToolChoiceTypes(BaseModel):
- `web_search_preview`
- `computer_use_preview`
- `code_interpreter`
- - `mcp`
- `image_generation`
"""
diff --git a/src/openai/types/responses/tool_choice_types_param.py b/src/openai/types/responses/tool_choice_types_param.py
index 175900750c..15e0357471 100644
--- a/src/openai/types/responses/tool_choice_types_param.py
+++ b/src/openai/types/responses/tool_choice_types_param.py
@@ -16,7 +16,6 @@ class ToolChoiceTypesParam(TypedDict, total=False):
"web_search_preview_2025_03_11",
"image_generation",
"code_interpreter",
- "mcp",
]
]
"""The type of hosted tool the model should to use.
@@ -30,6 +29,5 @@ class ToolChoiceTypesParam(TypedDict, total=False):
- `web_search_preview`
- `computer_use_preview`
- `code_interpreter`
- - `mcp`
- `image_generation`
"""
diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py
index fae8c4c8ff..828f3b5669 100644
--- a/src/openai/types/shared/all_models.py
+++ b/src/openai/types/shared/all_models.py
@@ -15,6 +15,10 @@
"o1-pro-2025-03-19",
"o3-pro",
"o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
"computer-use-preview",
"computer-use-preview-2025-03-11",
],
diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py
index 790c1212f6..4d35356806 100644
--- a/src/openai/types/shared/responses_model.py
+++ b/src/openai/types/shared/responses_model.py
@@ -15,6 +15,10 @@
"o1-pro-2025-03-19",
"o3-pro",
"o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
"computer-use-preview",
"computer-use-preview-2025-03-11",
],
diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py
index ca526b8f15..adfcecf1e5 100644
--- a/src/openai/types/shared_params/responses_model.py
+++ b/src/openai/types/shared_params/responses_model.py
@@ -17,6 +17,10 @@
"o1-pro-2025-03-19",
"o3-pro",
"o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
"computer-use-preview",
"computer-use-preview-2025-03-11",
],
diff --git a/src/openai/types/webhooks/__init__.py b/src/openai/types/webhooks/__init__.py
new file mode 100644
index 0000000000..9caad38c82
--- /dev/null
+++ b/src/openai/types/webhooks/__init__.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .unwrap_webhook_event import UnwrapWebhookEvent as UnwrapWebhookEvent
+from .batch_failed_webhook_event import BatchFailedWebhookEvent as BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent as BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent as BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent as BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent as EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent as ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent as EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent as EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent as ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent as ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent as ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent as FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import (
+ FineTuningJobCancelledWebhookEvent as FineTuningJobCancelledWebhookEvent,
+)
+from .fine_tuning_job_succeeded_webhook_event import (
+ FineTuningJobSucceededWebhookEvent as FineTuningJobSucceededWebhookEvent,
+)
diff --git a/src/openai/types/webhooks/batch_cancelled_webhook_event.py b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
new file mode 100644
index 0000000000..4bbd7307a5
--- /dev/null
+++ b/src/openai/types/webhooks/batch_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the batch API request."""
+
+
+class BatchCancelledWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the batch API request was cancelled."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["batch.cancelled"]
+ """The type of the event. Always `batch.cancelled`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_completed_webhook_event.py b/src/openai/types/webhooks/batch_completed_webhook_event.py
new file mode 100644
index 0000000000..a47ca156fa
--- /dev/null
+++ b/src/openai/types/webhooks/batch_completed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the batch API request."""
+
+
+class BatchCompletedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the batch API request was completed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["batch.completed"]
+ """The type of the event. Always `batch.completed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_expired_webhook_event.py b/src/openai/types/webhooks/batch_expired_webhook_event.py
new file mode 100644
index 0000000000..e91001e8d8
--- /dev/null
+++ b/src/openai/types/webhooks/batch_expired_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchExpiredWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the batch API request."""
+
+
+class BatchExpiredWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the batch API request expired."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["batch.expired"]
+ """The type of the event. Always `batch.expired`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/batch_failed_webhook_event.py b/src/openai/types/webhooks/batch_failed_webhook_event.py
new file mode 100644
index 0000000000..ef80863edb
--- /dev/null
+++ b/src/openai/types/webhooks/batch_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BatchFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the batch API request."""
+
+
+class BatchFailedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the batch API request failed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["batch.failed"]
+ """The type of the event. Always `batch.failed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_canceled_webhook_event.py b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
new file mode 100644
index 0000000000..855359f743
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_canceled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunCanceledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the eval run."""
+
+
+class EvalRunCanceledWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the eval run was canceled."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["eval.run.canceled"]
+ """The type of the event. Always `eval.run.canceled`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_failed_webhook_event.py b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
new file mode 100644
index 0000000000..7671680720
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the eval run."""
+
+
+class EvalRunFailedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the eval run failed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["eval.run.failed"]
+ """The type of the event. Always `eval.run.failed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
new file mode 100644
index 0000000000..d0d1fc2b04
--- /dev/null
+++ b/src/openai/types/webhooks/eval_run_succeeded_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["EvalRunSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the eval run."""
+
+
+class EvalRunSucceededWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the eval run succeeded."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["eval.run.succeeded"]
+ """The type of the event. Always `eval.run.succeeded`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
new file mode 100644
index 0000000000..1fe3c06096
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobCancelledWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the fine-tuning job was cancelled."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["fine_tuning.job.cancelled"]
+ """The type of the event. Always `fine_tuning.job.cancelled`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
new file mode 100644
index 0000000000..71d899c8ef
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobFailedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the fine-tuning job failed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["fine_tuning.job.failed"]
+ """The type of the event. Always `fine_tuning.job.failed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
new file mode 100644
index 0000000000..470f1fcfaa
--- /dev/null
+++ b/src/openai/types/webhooks/fine_tuning_job_succeeded_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FineTuningJobSucceededWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the fine-tuning job."""
+
+
+class FineTuningJobSucceededWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the fine-tuning job succeeded."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["fine_tuning.job.succeeded"]
+ """The type of the event. Always `fine_tuning.job.succeeded`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_cancelled_webhook_event.py b/src/openai/types/webhooks/response_cancelled_webhook_event.py
new file mode 100644
index 0000000000..443e360e90
--- /dev/null
+++ b/src/openai/types/webhooks/response_cancelled_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCancelledWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the model response."""
+
+
+class ResponseCancelledWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the model response was cancelled."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["response.cancelled"]
+ """The type of the event. Always `response.cancelled`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_completed_webhook_event.py b/src/openai/types/webhooks/response_completed_webhook_event.py
new file mode 100644
index 0000000000..ac1feff32b
--- /dev/null
+++ b/src/openai/types/webhooks/response_completed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the model response."""
+
+
+class ResponseCompletedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the model response was completed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["response.completed"]
+ """The type of the event. Always `response.completed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_failed_webhook_event.py b/src/openai/types/webhooks/response_failed_webhook_event.py
new file mode 100644
index 0000000000..5b4ba65e18
--- /dev/null
+++ b/src/openai/types/webhooks/response_failed_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the model response."""
+
+
+class ResponseFailedWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the model response failed."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["response.failed"]
+ """The type of the event. Always `response.failed`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/response_incomplete_webhook_event.py b/src/openai/types/webhooks/response_incomplete_webhook_event.py
new file mode 100644
index 0000000000..01609314e0
--- /dev/null
+++ b/src/openai/types/webhooks/response_incomplete_webhook_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteWebhookEvent", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """The unique ID of the model response."""
+
+
+class ResponseIncompleteWebhookEvent(BaseModel):
+ id: str
+ """The unique ID of the event."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) of when the model response was interrupted."""
+
+ data: Data
+ """Event data payload."""
+
+ type: Literal["response.incomplete"]
+ """The type of the event. Always `response.incomplete`."""
+
+ object: Optional[Literal["event"]] = None
+ """The object of the event. Always `event`."""
diff --git a/src/openai/types/webhooks/unwrap_webhook_event.py b/src/openai/types/webhooks/unwrap_webhook_event.py
new file mode 100644
index 0000000000..91091af32f
--- /dev/null
+++ b/src/openai/types/webhooks/unwrap_webhook_event.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .batch_failed_webhook_event import BatchFailedWebhookEvent
+from .batch_expired_webhook_event import BatchExpiredWebhookEvent
+from .batch_cancelled_webhook_event import BatchCancelledWebhookEvent
+from .batch_completed_webhook_event import BatchCompletedWebhookEvent
+from .eval_run_failed_webhook_event import EvalRunFailedWebhookEvent
+from .response_failed_webhook_event import ResponseFailedWebhookEvent
+from .eval_run_canceled_webhook_event import EvalRunCanceledWebhookEvent
+from .eval_run_succeeded_webhook_event import EvalRunSucceededWebhookEvent
+from .response_cancelled_webhook_event import ResponseCancelledWebhookEvent
+from .response_completed_webhook_event import ResponseCompletedWebhookEvent
+from .response_incomplete_webhook_event import ResponseIncompleteWebhookEvent
+from .fine_tuning_job_failed_webhook_event import FineTuningJobFailedWebhookEvent
+from .fine_tuning_job_cancelled_webhook_event import FineTuningJobCancelledWebhookEvent
+from .fine_tuning_job_succeeded_webhook_event import FineTuningJobSucceededWebhookEvent
+
+__all__ = ["UnwrapWebhookEvent"]
+
+UnwrapWebhookEvent: TypeAlias = Annotated[
+ Union[
+ BatchCancelledWebhookEvent,
+ BatchCompletedWebhookEvent,
+ BatchExpiredWebhookEvent,
+ BatchFailedWebhookEvent,
+ EvalRunCanceledWebhookEvent,
+ EvalRunFailedWebhookEvent,
+ EvalRunSucceededWebhookEvent,
+ FineTuningJobCancelledWebhookEvent,
+ FineTuningJobFailedWebhookEvent,
+ FineTuningJobSucceededWebhookEvent,
+ ResponseCancelledWebhookEvent,
+ ResponseCompletedWebhookEvent,
+ ResponseFailedWebhookEvent,
+ ResponseIncompleteWebhookEvent,
+ ],
+ PropertyInfo(discriminator="type"),
+]
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
index b28f5638c5..e8e3893bad 100644
--- a/tests/api_resources/responses/test_input_items.py
+++ b/tests/api_resources/responses/test_input_items.py
@@ -31,7 +31,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
response_id="response_id",
after="after",
before="before",
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
limit=0,
order="asc",
)
@@ -87,7 +87,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
response_id="response_id",
after="after",
before="before",
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
limit=0,
order="asc",
)
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 5b7559655a..9c76928c8c 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -28,10 +28,11 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
response = client.responses.create(
background=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
input="string",
instructions="instructions",
max_output_tokens=0,
+ max_tool_calls=0,
metadata={"foo": "string"},
model="gpt-4o",
parallel_tool_calls=True,
@@ -61,6 +62,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
"description": "description",
}
],
+ top_logprobs=0,
top_p=1,
truncation="auto",
user="user-1234",
@@ -99,10 +101,11 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
response_stream = client.responses.create(
stream=True,
background=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
input="string",
instructions="instructions",
max_output_tokens=0,
+ max_tool_calls=0,
metadata={"foo": "string"},
model="gpt-4o",
parallel_tool_calls=True,
@@ -131,6 +134,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
"description": "description",
}
],
+ top_logprobs=0,
top_p=1,
truncation="auto",
user="user-1234",
@@ -171,7 +175,7 @@ def test_method_retrieve_overload_1(self, client: OpenAI) -> None:
def test_method_retrieve_with_all_params_overload_1(self, client: OpenAI) -> None:
response = client.responses.retrieve(
response_id="resp_677efb5139a88190b512bc3fef8e535d",
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
starting_after=0,
stream=False,
)
@@ -221,7 +225,7 @@ def test_method_retrieve_with_all_params_overload_2(self, client: OpenAI) -> Non
response_stream = client.responses.retrieve(
response_id="resp_677efb5139a88190b512bc3fef8e535d",
stream=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
starting_after=0,
)
response_stream.response.close()
@@ -350,10 +354,11 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
response = await async_client.responses.create(
background=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
input="string",
instructions="instructions",
max_output_tokens=0,
+ max_tool_calls=0,
metadata={"foo": "string"},
model="gpt-4o",
parallel_tool_calls=True,
@@ -383,6 +388,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
"description": "description",
}
],
+ top_logprobs=0,
top_p=1,
truncation="auto",
user="user-1234",
@@ -421,10 +427,11 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
response_stream = await async_client.responses.create(
stream=True,
background=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
input="string",
instructions="instructions",
max_output_tokens=0,
+ max_tool_calls=0,
metadata={"foo": "string"},
model="gpt-4o",
parallel_tool_calls=True,
@@ -453,6 +460,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
"description": "description",
}
],
+ top_logprobs=0,
top_p=1,
truncation="auto",
user="user-1234",
@@ -493,7 +501,7 @@ async def test_method_retrieve_overload_1(self, async_client: AsyncOpenAI) -> No
async def test_method_retrieve_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
response = await async_client.responses.retrieve(
response_id="resp_677efb5139a88190b512bc3fef8e535d",
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
starting_after=0,
stream=False,
)
@@ -543,7 +551,7 @@ async def test_method_retrieve_with_all_params_overload_2(self, async_client: As
response_stream = await async_client.responses.retrieve(
response_id="resp_677efb5139a88190b512bc3fef8e535d",
stream=True,
- include=["file_search_call.results"],
+ include=["code_interpreter_call.outputs"],
starting_after=0,
)
await response_stream.response.aclose()
diff --git a/tests/api_resources/test_webhooks.py b/tests/api_resources/test_webhooks.py
new file mode 100644
index 0000000000..6b404998e1
--- /dev/null
+++ b/tests/api_resources/test_webhooks.py
@@ -0,0 +1,284 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from unittest import mock
+
+import pytest
+
+import openai
+from openai._exceptions import InvalidWebhookSignatureError
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+# Standardized test constants (matches TypeScript implementation)
+TEST_SECRET = "whsec_RdvaYFYUXuIFuEbvZHwMfYFhUf7aMYjYcmM24+Aj40c="
+TEST_PAYLOAD = '{"id": "evt_685c059ae3a481909bdc86819b066fb6", "object": "event", "created_at": 1750861210, "type": "response.completed", "data": {"id": "resp_123"}}'
+TEST_TIMESTAMP = 1750861210 # Fixed timestamp that matches our test signature
+TEST_WEBHOOK_ID = "wh_685c059ae39c8190af8c71ed1022a24d"
+TEST_SIGNATURE = "v1,gUAg4R2hWouRZqRQG4uJypNS8YK885G838+EHb4nKBY="
+
+
+def create_test_headers(
+ timestamp: int | None = None, signature: str | None = None, webhook_id: str | None = None
+) -> dict[str, str]:
+ """Helper function to create test headers"""
+ return {
+ "webhook-signature": signature or TEST_SIGNATURE,
+ "webhook-timestamp": str(timestamp or TEST_TIMESTAMP),
+ "webhook-id": webhook_id or TEST_WEBHOOK_ID,
+ }
+
+
+class TestWebhooks:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_unwrap_with_secret(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ unwrapped = client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+ assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+ assert unwrapped.created_at == 1750861210
+
+ @parametrize
+ def test_unwrap_without_secret(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(ValueError, match="The webhook secret must either be set"):
+ client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_valid(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ # Should not raise - this is a truly valid signature for this timestamp
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ def test_verify_signature_invalid_secret_format(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(ValueError, match="The webhook secret must either be set"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_invalid(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+ @parametrize
+ def test_verify_signature_missing_webhook_signature_header(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers(signature=None)
+ del headers["webhook-signature"]
+ with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ def test_verify_signature_missing_webhook_timestamp_header(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ del headers["webhook-timestamp"]
+ with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ def test_verify_signature_missing_webhook_id_header(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ del headers["webhook-id"]
+ with pytest.raises(ValueError, match="Could not find webhook-id header"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_payload_bytes(self, client: openai.OpenAI) -> None:
+ headers = create_test_headers()
+ client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ def test_unwrap_with_client_secret(self) -> None:
+ test_client = openai.OpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+ headers = create_test_headers()
+
+ unwrapped = test_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+ assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+ assert unwrapped.created_at == 1750861210
+
+ @parametrize
+ def test_verify_signature_timestamp_too_old(self, client: openai.OpenAI) -> None:
+ # Use a timestamp that's older than 5 minutes from our test timestamp
+ old_timestamp = TEST_TIMESTAMP - 400 # 6 minutes 40 seconds ago
+ headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+ with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_timestamp_too_new(self, client: openai.OpenAI) -> None:
+ # Use a timestamp that's in the future beyond tolerance from our test timestamp
+ future_timestamp = TEST_TIMESTAMP + 400 # 6 minutes 40 seconds in the future
+ headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+ with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_custom_tolerance(self, client: openai.OpenAI) -> None:
+ # Use a timestamp that's older than default tolerance but within custom tolerance
+ old_timestamp = TEST_TIMESTAMP - 400 # 6 minutes 40 seconds ago from test timestamp
+ headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+ # Should fail with default tolerance
+ with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ # Should also fail with custom tolerance of 10 minutes (signature won't match)
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET, tolerance=600)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_recent_timestamp_succeeds(self, client: openai.OpenAI) -> None:
+ # Use a recent timestamp with dummy signature
+ headers = create_test_headers(signature="v1,dummy_signature")
+
+ # Should fail on signature verification (not timestamp validation)
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_multiple_signatures_one_valid(self, client: openai.OpenAI) -> None:
+ # Test multiple signatures: one invalid, one valid
+ multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+ headers = create_test_headers(signature=multiple_signatures)
+
+ # Should not raise when at least one signature is valid
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ def test_verify_signature_multiple_signatures_all_invalid(self, client: openai.OpenAI) -> None:
+ # Test multiple invalid signatures
+ multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+ headers = create_test_headers(signature=multiple_invalid_signatures)
+
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+
+class TestAsyncWebhooks:
+ parametrize = pytest.mark.parametrize(
+ "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+ )
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_unwrap_with_secret(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ unwrapped = async_client.webhooks.unwrap(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+ assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+ assert unwrapped.created_at == 1750861210
+
+ @parametrize
+ async def test_unwrap_without_secret(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(ValueError, match="The webhook secret must either be set"):
+ async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_valid(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ # Should not raise - this is a truly valid signature for this timestamp
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ async def test_verify_signature_invalid_secret_format(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(ValueError, match="The webhook secret must either be set"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=None)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret="invalid_secret")
+
+ @parametrize
+ async def test_verify_signature_missing_webhook_signature_header(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ del headers["webhook-signature"]
+ with pytest.raises(ValueError, match="Could not find webhook-signature header"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ async def test_verify_signature_missing_webhook_timestamp_header(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ del headers["webhook-timestamp"]
+ with pytest.raises(ValueError, match="Could not find webhook-timestamp header"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @parametrize
+ async def test_verify_signature_missing_webhook_id_header(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ del headers["webhook-id"]
+ with pytest.raises(ValueError, match="Could not find webhook-id header"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_payload_bytes(self, async_client: openai.AsyncOpenAI) -> None:
+ headers = create_test_headers()
+ async_client.webhooks.verify_signature(TEST_PAYLOAD.encode("utf-8"), headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ async def test_unwrap_with_client_secret(self) -> None:
+ test_async_client = openai.AsyncOpenAI(base_url=base_url, api_key="test-api-key", webhook_secret=TEST_SECRET)
+ headers = create_test_headers()
+
+ unwrapped = test_async_client.webhooks.unwrap(TEST_PAYLOAD, headers)
+ assert unwrapped.id == "evt_685c059ae3a481909bdc86819b066fb6"
+ assert unwrapped.created_at == 1750861210
+
+ @parametrize
+ async def test_verify_signature_timestamp_too_old(self, async_client: openai.AsyncOpenAI) -> None:
+ # Use a timestamp that's older than 5 minutes from our test timestamp
+ old_timestamp = TEST_TIMESTAMP - 400 # 6 minutes 40 seconds ago
+ headers = create_test_headers(timestamp=old_timestamp, signature="v1,dummy_signature")
+
+ with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too old"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_timestamp_too_new(self, async_client: openai.AsyncOpenAI) -> None:
+ # Use a timestamp that's in the future beyond tolerance from our test timestamp
+ future_timestamp = TEST_TIMESTAMP + 400 # 6 minutes 40 seconds in the future
+ headers = create_test_headers(timestamp=future_timestamp, signature="v1,dummy_signature")
+
+ with pytest.raises(InvalidWebhookSignatureError, match="Webhook timestamp is too new"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_multiple_signatures_one_valid(self, async_client: openai.AsyncOpenAI) -> None:
+ # Test multiple signatures: one invalid, one valid
+ multiple_signatures = f"v1,invalid_signature {TEST_SIGNATURE}"
+ headers = create_test_headers(signature=multiple_signatures)
+
+ # Should not raise when at least one signature is valid
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
+
+ @mock.patch("time.time", mock.MagicMock(return_value=TEST_TIMESTAMP))
+ @parametrize
+ async def test_verify_signature_multiple_signatures_all_invalid(self, async_client: openai.AsyncOpenAI) -> None:
+ # Test multiple invalid signatures
+ multiple_invalid_signatures = "v1,invalid_signature1 v1,invalid_signature2"
+ headers = create_test_headers(signature=multiple_invalid_signatures)
+
+ with pytest.raises(InvalidWebhookSignatureError, match="The given webhook signature does not match"):
+ async_client.webhooks.verify_signature(TEST_PAYLOAD, headers, secret=TEST_SECRET)
diff --git a/tests/lib/chat/test_completions.py b/tests/lib/chat/test_completions.py
index 62fdd34c0a..e7143bbb68 100644
--- a/tests/lib/chat/test_completions.py
+++ b/tests/lib/chat/test_completions.py
@@ -33,7 +33,7 @@
@pytest.mark.respx(base_url=base_url)
def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -101,7 +101,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -171,7 +171,7 @@ class Location(BaseModel):
units: Optional[Literal["c", "f"]] = None
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -248,7 +248,7 @@ class ColorDetection(BaseModel):
ColorDetection.update_forward_refs(**locals()) # type: ignore
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "user", "content": "What color is a Coke can?"},
@@ -293,7 +293,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -376,7 +376,7 @@ class CalendarEvent:
participants: List[str]
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Extract the event information."},
@@ -437,7 +437,7 @@ class CalendarEvent:
@pytest.mark.respx(base_url=base_url)
def test_pydantic_tool_model_all_types(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -522,7 +522,7 @@ class Location(BaseModel):
with pytest.raises(openai.LengthFinishReasonError):
_make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -549,7 +549,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -597,7 +597,7 @@ class GetWeatherArgs(BaseModel):
units: Literal["c", "f"] = "c"
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -663,7 +663,7 @@ class GetStockPrice(BaseModel):
exchange: str
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -734,7 +734,7 @@ class GetStockPrice(BaseModel):
@pytest.mark.respx(base_url=base_url)
def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
completion = _make_snapshot_request(
- lambda c: c.beta.chat.completions.parse(
+ lambda c: c.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -808,7 +808,7 @@ def test_parse_non_strict_tools(client: OpenAI) -> None:
with pytest.raises(
ValueError, match="`get_weather` is not strict. Only `strict` function tools can be auto-parsed"
):
- client.beta.chat.completions.parse(
+ client.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[],
tools=[
@@ -831,7 +831,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
response = _make_snapshot_request(
- lambda c: c.beta.chat.completions.with_raw_response.parse(
+ lambda c: c.chat.completions.with_raw_response.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -847,7 +847,7 @@ class Location(BaseModel):
mock_client=client,
respx_mock=respx_mock,
)
- assert response.http_request.headers.get("x-stainless-helper-method") == "beta.chat.completions.parse"
+ assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
completion = response.parse()
message = completion.choices[0].message
@@ -907,7 +907,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
response = await _make_async_snapshot_request(
- lambda c: c.beta.chat.completions.with_raw_response.parse(
+ lambda c: c.chat.completions.with_raw_response.parse(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -923,7 +923,7 @@ class Location(BaseModel):
mock_client=async_client,
respx_mock=respx_mock,
)
- assert response.http_request.headers.get("x-stainless-helper-method") == "beta.chat.completions.parse"
+ assert response.http_request.headers.get("x-stainless-helper-method") == "chat.completions.parse"
completion = response.parse()
message = completion.choices[0].message
@@ -978,7 +978,7 @@ def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpe
assert_signatures_in_sync(
checking_client.chat.completions.create,
- checking_client.beta.chat.completions.parse,
+ checking_client.chat.completions.parse,
exclude_params={"response_format", "stream"},
)
diff --git a/tests/lib/chat/test_completions_streaming.py b/tests/lib/chat/test_completions_streaming.py
index 5852c5a343..4680a73e3a 100644
--- a/tests/lib/chat/test_completions_streaming.py
+++ b/tests/lib/chat/test_completions_streaming.py
@@ -41,7 +41,7 @@
@pytest.mark.respx(base_url=base_url)
def test_parse_nothing(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -103,7 +103,7 @@ def on_event(stream: ChatCompletionStream[Location], event: ChatCompletionStream
done_snapshots.append(model_copy(stream.current_completion_snapshot, deep=True))
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -195,7 +195,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -374,7 +374,7 @@ class Location(BaseModel):
with pytest.raises(openai.LengthFinishReasonError):
_make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -399,7 +399,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -444,7 +444,7 @@ class Location(BaseModel):
@pytest.mark.respx(base_url=base_url)
def test_content_logprobs_events(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -523,7 +523,7 @@ class Location(BaseModel):
units: Literal["c", "f"]
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -635,7 +635,7 @@ class GetWeatherArgs(BaseModel):
units: Literal["c", "f"] = "c"
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -733,7 +733,7 @@ class GetStockPrice(BaseModel):
exchange: str
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -831,7 +831,7 @@ class GetStockPrice(BaseModel):
@pytest.mark.respx(base_url=base_url)
def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -903,7 +903,7 @@ def test_parse_strict_tools(client: OpenAI, respx_mock: MockRouter, monkeypatch:
@pytest.mark.respx(base_url=base_url)
def test_non_pydantic_response_format(client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch) -> None:
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[
{
@@ -951,7 +951,7 @@ def test_allows_non_strict_tools_but_no_parsing(
client: OpenAI, respx_mock: MockRouter, monkeypatch: pytest.MonkeyPatch
) -> None:
listener = _make_stream_snapshot_request(
- lambda c: c.beta.chat.completions.stream(
+ lambda c: c.chat.completions.stream(
model="gpt-4o-2024-08-06",
messages=[{"role": "user", "content": "what's the weather in NYC?"}],
tools=[
@@ -1069,7 +1069,7 @@ def test_stream_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOp
assert_signatures_in_sync(
checking_client.chat.completions.create,
- checking_client.beta.chat.completions.stream,
+ checking_client.chat.completions.stream,
exclude_params={"response_format", "stream"},
)
diff --git a/tests/test_client.py b/tests/test_client.py
index 3d08a0a601..988e5d994c 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -192,6 +192,7 @@ def test_copy_signature(self) -> None:
copy_param = copy_signature.parameters.get(name)
assert copy_param is not None, f"copy() signature is missing the {name} param"
+ @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
def test_copy_build_request(self) -> None:
options = FinalRequestOptions(method="get", url="/foo")
@@ -1074,6 +1075,7 @@ def test_copy_signature(self) -> None:
copy_param = copy_signature.parameters.get(name)
assert copy_param is not None, f"copy() signature is missing the {name} param"
+ @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
def test_copy_build_request(self) -> None:
options = FinalRequestOptions(method="get", url="/foo")
diff --git a/tests/test_module_client.py b/tests/test_module_client.py
index 6bab33a1d7..9c9a1addab 100644
--- a/tests/test_module_client.py
+++ b/tests/test_module_client.py
@@ -17,6 +17,7 @@ def reset_state() -> None:
openai.api_key = None or "My API Key"
openai.organization = None
openai.project = None
+ openai.webhook_secret = None
openai.base_url = None
openai.timeout = DEFAULT_TIMEOUT
openai.max_retries = DEFAULT_MAX_RETRIES