Skip to content

Instantly share code, notes, and snippets.

@kyo-takano
Created April 16, 2024 09:31
Show Gist options
  • Save kyo-takano/f969178f8423c0a2a6279db633c18d30 to your computer and use it in GitHub Desktop.
Save kyo-takano/f969178f8423c0a2a6279db633c18d30 to your computer and use it in GitHub Desktop.
Submitting an Asyncronous Large-Batch Request with OpenAI Python SDK
#!/usr/bin/python
import json
import textwrap
import time
from openai import OpenAI
client = OpenAI()
"""
Create a data file containing a batch of chat messages to complete
"""
filepath = "data.jsonl"
messages = [
dict(
messages=[
dict(
role="system",
content="Improve the given code through clear and detailed thinking.",
),
dict(
role="user",
content=textwrap.dedent("""\
```python
from difflib import Differ
def get_diff(before, after):
differ = Differ()
diff = differ.compare(before.split(), after.split())
output = ""
for line in diff:
if line.startswith(" "):
output += line[2:] + " "
elif line.startswith("- "):
output += f"\033[91m{line[2:]}\033[0m"
elif line.startswith("+ "):
output += f"\033[92m{line[2:]}\033[0m "
else:
output += line
return output
```"""),
),
],
),
# More conversations here (to make it a batch)
]
with open(filepath, "w") as file:
for i, message in enumerate(messages):
message = dict(
custom_id=f"request-{i}",
method="POST",
url="/v1/chat/completions",
body=dict(model="gpt-4-turbo", messages=[message]),
) # https://platform.openai.com/docs/api-reference/batch/requestInput
json.dump(message, file)
file.write("\n")
"""
Upload to the storage (file to be found at https://platform.openai.com/storage/files)
"""
response = client.files.create(file=open(filepath, "rb"), purpose="batch")
file_id = response.id
print(f"{file_id=}")
"""
Submit a batch job
"""
response = client.batches.create(
input_file_id=file_id,
endpoint="/v1/chat/completions",
completion_window="24h",
)
batch_id = response.id
print(f"{batch_id=}")
"""
Wait up to 24 hours.
"""
time.sleep(24 * 60 * 60)
"""
Retrieve the generations
"""
response = client.batches.retrieve(batch_id=batch_id)
output_file_id = response.output_file_id
assert output_file_id is not None, "Learn to be patient!"
print(f"{output_file_id=}")
"""
Save to a local file.
"""
content = client.files.content(output_file_id)
content.write_to_file("output.jsonl")
@kyo-takano
Copy link
Author

Update

The official API Reference now has Python code examples for this feature:
https://platform.openai.com/docs/api-reference/batch/retrieve?lang=python

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment