Get Started

POST

chat

completions

curl --request POST \
  --url https://api.yutori.com/v1/chat/completions \
  --header "Authorization: Bearer <api_key>" \
  --header "Content-Type: application/json" \
  --data '{
    "model": "n1.5-latest",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "Click on the search bar and type Yutori."
          },
          {
            "type": "image_url",
            "image_url": {
              "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
            }
          }
        ]
      }
    ]
  }'

from openai import OpenAI

client = OpenAI(
    base_url="https://api.yutori.com/v1",
    api_key="YOUR_API_KEY",
)

response = client.chat.completions.create(
    model="n1.5-latest",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Click on the search bar and type Yutori."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
                    }
                }
            ]
        }
    ]
)

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.yutori.com/v1",
  apiKey: "YOUR_API_KEY",
});

const response = await client.chat.completions.create({
  model: "n1.5-latest",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "Click on the search bar and type Yutori.",
        },
        {
          type: "image_url",
          image_url: {
            url: "https://docs.yutori.com/assets/google_homepage_2024.jpg",
          },
        },
      ],
    },
  ],
});

{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "model": "n1.5-latest",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "I can see the Google homepage. I'll click on the search bar to begin searching for Yutori.",
        "tool_calls": [
          {
            "id": "chatcmpl-tool-abc123",
            "type": "function",
            "function": {
              "name": "left_click",
              "arguments": "{\"coordinates\": [640, 400]}"
            }
          }
        ]
      },
      "finish_reason": "tool_calls"
    }
  ],
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 56,
    "total_tokens": 1290
  },
  "request_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
}

curl --request POST \
  --url https://api.yutori.com/v1/chat/completions \
  --header "Authorization: Bearer <api_key>" \
  --header "Content-Type: application/json" \
  --data '{
    "model": "n1.5-latest",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "Click on the search bar and type Yutori."
          },
          {
            "type": "image_url",
            "image_url": {
              "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
            }
          }
        ]
      }
    ]
  }'

from openai import OpenAI

client = OpenAI(
    base_url="https://api.yutori.com/v1",
    api_key="YOUR_API_KEY",
)

response = client.chat.completions.create(
    model="n1.5-latest",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Click on the search bar and type Yutori."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
                    }
                }
            ]
        }
    ]
)

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.yutori.com/v1",
  apiKey: "YOUR_API_KEY",
});

const response = await client.chat.completions.create({
  model: "n1.5-latest",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "Click on the search bar and type Yutori.",
        },
        {
          type: "image_url",
          image_url: {
            url: "https://docs.yutori.com/assets/google_homepage_2024.jpg",
          },
        },
      ],
    },
  ],
});

{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "model": "n1.5-latest",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "I can see the Google homepage. I'll click on the search bar to begin searching for Yutori.",
        "tool_calls": [
          {
            "id": "chatcmpl-tool-abc123",
            "type": "function",
            "function": {
              "name": "left_click",
              "arguments": "{\"coordinates\": [640, 400]}"
            }
          }
        ]
      },
      "finish_reason": "tool_calls"
    }
  ],
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 56,
    "total_tokens": 1290
  },
  "request_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
}

Overview

The Navigator API provides a computer-use model family that predicts actions to interact with a browser. Given a task in natural language, the current screenshot, and the full action history, the model predicts the next action to take to accomplish the task. The API follows the OpenAI chat.completions format. The latest model is:

Model	API model id	Description
Navigator n1.5	`n1.5-latest`	Computer-use model with expanded action space, selectable tool sets, and JSON structured output. Details →

Want to try Navigator without writing code?

Navigator Browser Extension — test and explore in your own local browser
Yutori Local — Mac desktop app that gives our agents access to a browser on your computer, enabling login-required workflows and local browsing tasks

Screenshot Requirements

Screenshots should capture only the browser content itself. Do not include the operating system UI, window title bars, browser tabs, URL bars, or other chrome elements. For the best performance, render screenshots in WXGA (1280×800, 16:10). The model should generalize well to most other resolutions, but grounding accuracy may degrade with extreme aspect ratios. We recommend using the WebP format for screenshots, as it offers significantly better compression than PNG — especially for multi-step trajectories with many images. The Python SDK provides helpers that handle resizing, WebP conversion, and base64 encoding:

pip install yutori

from yutori.navigator import screenshot_to_data_url, playwright_screenshot_to_data_url

# From raw screenshot bytes (any format)
image_url = screenshot_to_data_url(screenshot_bytes)

# Or directly from a Playwright page
image_url = playwright_screenshot_to_data_url(page)

# Async variant also available
# image_url = await aplaywright_screenshot_to_data_url(page)

See yutori.navigator.images for full options (custom resolution, quality settings).

Coordinate System

All Navigator models output normalized coordinates in a 1000×1000 space. Convert to absolute pixel coordinates before executing actions in your browser:

from yutori.navigator import denormalize_coordinates

pixel_x, pixel_y = denormalize_coordinates(
    coordinates=[640, 400],
    width=viewport_width,
    height=viewport_height,
)

See yutori.navigator.coordinates for the inverse normalize_coordinates function.

Response Format

Actions are returned via the tool_calls field in the response message:

{
  "choices": [
    {
      "message": {
        "role": "assistant",
        "content": "I'll click on the search bar.",
        "tool_calls": [
          {
            "id": "chatcmpl-tool-abc123",
            "type": "function",
            "function": {
              "name": "left_click",
              "arguments": "{\"coordinates\": [640, 400]}"
            }
          }
        ]
      },
      "finish_reason": "tool_calls"
    }
  ]
}

The content field contains the model’s reasoning, tool_calls contains the predicted action(s), and request_id is a unique identifier useful for debugging. When the model intends to stop, it returns a response with only content text and no tool_calls. This content field is the model’s final response to the task.

Multi-Turn Conversations

The model expects full chat history to best predict the next action. We do not recommend removing any messages when constructing requests. For longer trajectories, we suggest dropping only old screenshots while keeping every message intact. Always keep a minimum of the 2 most recent screenshots, and never send a request with no screenshot at all. Requests that carry only stale screenshots (or none) sharply degrade grounding and can cause the model to loop or hallucinate. The Python SDK provides create_trimmed / acreate_trimmed (in yutori.navigator) which strip older screenshots automatically while preserving recent ones and all text — by default keeping the 6 most recent screenshots (and always the latest), and only trimming when a request would otherwise exceed the size limit. Include the assistant’s previous response with its tool_calls, followed by tool results with the new screenshot:

response = client.chat.completions.create(
    model="n1.5-latest",
    messages=[
        # Initial user message with task and first screenshot
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Search for Yutori on Google."},
                {"type": "image_url", "image_url": {"url": f"data:image/webp;base64,{screenshot_1}"}}
            ]
        },
        # Assistant's response with tool call
        {
            "role": "assistant",
            "content": "I can see the Google homepage. I'll click on the search bar.",
            "tool_calls": [
                {
                    "id": "chatcmpl-tool-123",
                    "type": "function",
                    "function": {
                        "name": "left_click",
                        "arguments": "{\"coordinates\": [500, 465]}"
                    }
                }
            ]
        },
        # Tool result with current URL and new screenshot
        {
            "role": "tool",
            "tool_call_id": "chatcmpl-tool-123",
            "content": [
                {"type": "text", "text": "Clicked 1x with left\nCurrent URL: https://www.google.com/"},
                {"type": "image_url", "image_url": {"url": f"data:image/webp;base64,{screenshot_2}"}}
            ]
        }
    ]
)

The tool result can be a short description of what the tool did or whether it was successful. Including the current URL in the tool response is important for better attribution of information sources. If the tool extracts additional information (e.g. extract_elements, execute_js), the raw output of the tool can be provided as the tool result so the model also has visibility of the extracted information.

Custom Tools

You can provide additional tools alongside the built-in browser actions using the tools parameter. Custom tools are appended after the default tool set.

response = client.chat.completions.create(
    model="n1.5-latest",
    messages=[...],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "extract_content_and_links",
                "description": "Extracts page content and hyperlinks relevant to the user task. This operation is strictly read-only and never interacts with or alters the page.",
                "parameters": {"type": "object", "properties": {}, "required": []}
            }
        }
    ]
)

Results of custom tool calls should also be provided as a tool response so the model can use it for subsequent decisions:

{
    "role": "tool",
    "tool_call_id": "chatcmpl-tool-456",
    "content": [
        {
            "type": "text",
            "text": "Visible buttons and links:\n- About https://about.google/\n- Store https://store.google.com/\n- Gmail https://mail.google.com/\nCurrent page URL: https://www.google.com/"
        },
        {
            "type": "image_url",
            "image_url": {"url": f"data:image/webp;base64,{screenshot}"}
        }
    ]
}

Tool Choice

Control whether tool calls are parsed into the tool_calls array:

"auto" (default): Parses and returns tool calls as a structured tool_calls list
"none": Returns the raw model response as content text (tool calls may still appear inside <tool_call> XML tags in content)

Prompting Guidance

We use a default system prompt when none is provided, and generally do not recommend providing custom system prompts — extra behavioral instructions may degrade results. Instead, place additional instructions in the first user message, after the main task description:

{
  "messages": [
    {
      "role": "user",
      "content": "Click the login button. Additional instructions: be careful with popups."
    }
  ]
}

We also recommend not interrupting trajectory execution with additional user messages, except to force the model to stop and summarize:

messages.append({
    "role": "user",
    "content": f"Stop here. Summarize your current progress and list all findings relevant to: {task}"
})

Structured Decoding

By default, the API uses a structural_tag response format to enforce valid tool call generation via guided decoding. You do not need to provide this yourself — the API generates it automatically based on the active model and tool set. If custom tools are included in your request, their schemas are automatically incorporated. We do not recommend overriding the response_format unless you also set tool_choice="none" to work with the raw model output directly.

Authorizations

Authorization

string

header

required

Use Authorization: Bearer <api_key>

Body

application/json

messages

(ChatCompletionDeveloperMessageParam · object | ChatCompletionSystemMessageParam · object | ChatCompletionUserMessageParam · object | ChatCompletionAssistantMessageParam · object | ChatCompletionToolMessageParam · object | ChatCompletionFunctionMessageParam · object | ChatCompletionToolImageMessageParam · object | ChatCompletionObservationMessageParam · object)[]

Show child attributes

model

enum<string>

Available options:

n1.5-latest,

n1.5-20260428

max_completion_tokens

integer

default:1572

temperature

number | null

default:0.3

top_p

number | null

repetition_penalty

number | null

Penalizes token repetition. 1.0 = no penalty, >1.0 = less repetition. Only supported by vLLM-backed models.

presence_penalty

number | null

frequency_penalty

number | null

tools

Tools · object[] | null

Additional tools to extend the default browser action tools. Tools are merged with the built-in browser actions (left_click, scroll, type, etc.).

tool_choice

default:auto

Controls whether tool calls are parsed from the response. Model always decides whether to call a tool. 'none' treats the response as text-only, but tool calls may be present inside <tool_call> tags, 'auto' (default) parses tool calls automatically as tool_calls list in response.

response_format

Response Format · object | null

An object specifying the format that the model must output.

tool_set

string | null

Named tool set (n1.5+ models only). 'browser_tools_core-20260403' (default): coordinate-based tools. 'browser_tools_expanded-20260403': adds extract_elements, find, set_element_value, execute_js.

disable_tools

string[] | null

List of tool names to remove from the selected tool set (n1.5+ models only).

json_schema

Json Schema · object | null

JSON Schema for structured output (n1.5+ models only). Appended to your task message. Model returns JSON in ```json fences, parsed and returned as 'parsed_json' in the response.

prev_request_id

string | null

The request_id returned by the previous Navigator call in this conversation. Echo it back on each subsequent call to link the calls into one conversation for usage reporting.

Response

Successful Response

Navigator n1.5

curl --request POST \
  --url https://api.yutori.com/v1/chat/completions \
  --header "Authorization: Bearer <api_key>" \
  --header "Content-Type: application/json" \
  --data '{
    "model": "n1.5-latest",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "Click on the search bar and type Yutori."
          },
          {
            "type": "image_url",
            "image_url": {
              "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
            }
          }
        ]
      }
    ]
  }'

from openai import OpenAI

client = OpenAI(
    base_url="https://api.yutori.com/v1",
    api_key="YOUR_API_KEY",
)

response = client.chat.completions.create(
    model="n1.5-latest",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Click on the search bar and type Yutori."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://docs.yutori.com/assets/google_homepage_2024.jpg"
                    }
                }
            ]
        }
    ]
)

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.yutori.com/v1",
  apiKey: "YOUR_API_KEY",
});

const response = await client.chat.completions.create({
  model: "n1.5-latest",
  messages: [
    {
      role: "user",
      content: [
        {
          type: "text",
          text: "Click on the search bar and type Yutori.",
        },
        {
          type: "image_url",
          image_url: {
            url: "https://docs.yutori.com/assets/google_homepage_2024.jpg",
          },
        },
      ],
    },
  ],
});

{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "model": "n1.5-latest",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "I can see the Google homepage. I'll click on the search bar to begin searching for Yutori.",
        "tool_calls": [
          {
            "id": "chatcmpl-tool-abc123",
            "type": "function",
            "function": {
              "name": "left_click",
              "arguments": "{\"coordinates\": [640, 400]}"
            }
          }
        ]
      },
      "finish_reason": "tool_calls"
    }
  ],
  "usage": {
    "prompt_tokens": 1234,
    "completion_tokens": 56,
    "total_tokens": 1290
  },
  "request_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
}

Getting Started

General

Navigator API

Browsing API

Research API

Scouting API

Webhooks

Overview

Screenshot Requirements

Coordinate System

Response Format

Multi-Turn Conversations

Custom Tools

Tool Choice

Prompting Guidance

Structured Decoding

Authorizations

Body

Response

​Overview

​Screenshot Requirements

​Coordinate System

​Response Format

​Multi-Turn Conversations

​Custom Tools

​Tool Choice

​Prompting Guidance

​Structured Decoding

Authorizations

Body

Response

Overview

Screenshot Requirements

Coordinate System

Response Format

Multi-Turn Conversations

Custom Tools

Tool Choice

Prompting Guidance

Structured Decoding