网页读取

网页读取接口

curl --request POST \
  --url https://geekai.co/api/v1/web_fetch \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://www.example.com",
  "model": "jina-reader-v1",
  "engine": "direct",
  "response_format": "markdown",
  "timeout": 10,
  "budget_tokens": 200000,
  "target_selectors": [
    "<string>"
  ],
  "wait_for_selectors": [
    "<string>"
  ],
  "remove_selectors": [
    "<string>"
  ],
  "openai_compatible": false,
  "remove_images": false,
  "with_links": false,
  "with_images": false,
  "with_images_alt": false,
  "with_iframes": false
}
'

import requests

url = "https://geekai.co/api/v1/web_fetch"

payload = {
    "url": "https://www.example.com",
    "model": "jina-reader-v1",
    "engine": "direct",
    "response_format": "markdown",
    "timeout": 10,
    "budget_tokens": 200000,
    "target_selectors": ["<string>"],
    "wait_for_selectors": ["<string>"],
    "remove_selectors": ["<string>"],
    "openai_compatible": False,
    "remove_images": False,
    "with_links": False,
    "with_images": False,
    "with_images_alt": False,
    "with_iframes": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://www.example.com',
    model: 'jina-reader-v1',
    engine: 'direct',
    response_format: 'markdown',
    timeout: 10,
    budget_tokens: 200000,
    target_selectors: ['<string>'],
    wait_for_selectors: ['<string>'],
    remove_selectors: ['<string>'],
    openai_compatible: false,
    remove_images: false,
    with_links: false,
    with_images: false,
    with_images_alt: false,
    with_iframes: false
  })
};

fetch('https://geekai.co/api/v1/web_fetch', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://geekai.co/api/v1/web_fetch",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://www.example.com',
    'model' => 'jina-reader-v1',
    'engine' => 'direct',
    'response_format' => 'markdown',
    'timeout' => 10,
    'budget_tokens' => 200000,
    'target_selectors' => [
        '<string>'
    ],
    'wait_for_selectors' => [
        '<string>'
    ],
    'remove_selectors' => [
        '<string>'
    ],
    'openai_compatible' => false,
    'remove_images' => false,
    'with_links' => false,
    'with_images' => false,
    'with_images_alt' => false,
    'with_iframes' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://geekai.co/api/v1/web_fetch"

	payload := strings.NewReader("{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://geekai.co/api/v1/web_fetch")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://geekai.co/api/v1/web_fetch")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "created": 123,
  "result": {
    "url": "<string>",
    "title": "<string>",
    "content": "<string>",
    "screenshot": "<string>",
    "links": {},
    "images": {},
    "metadata": {}
  },
  "usage": {
    "total_tokens": 1024,
    "completion_tokens": 1024
  }
}

{
  "code": "validation_error",
  "message": "参数验证失败",
  "details": {
    "field": "错误描述"
  }
}

{
  "code": "unauthorized",
  "message": "Invalid API key or token"
}

{
  "code": "invalid_request",
  "message": "请求参数不合法"
}

POST

web_fetch

网页读取接口

curl --request POST \
  --url https://geekai.co/api/v1/web_fetch \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://www.example.com",
  "model": "jina-reader-v1",
  "engine": "direct",
  "response_format": "markdown",
  "timeout": 10,
  "budget_tokens": 200000,
  "target_selectors": [
    "<string>"
  ],
  "wait_for_selectors": [
    "<string>"
  ],
  "remove_selectors": [
    "<string>"
  ],
  "openai_compatible": false,
  "remove_images": false,
  "with_links": false,
  "with_images": false,
  "with_images_alt": false,
  "with_iframes": false
}
'

import requests

url = "https://geekai.co/api/v1/web_fetch"

payload = {
    "url": "https://www.example.com",
    "model": "jina-reader-v1",
    "engine": "direct",
    "response_format": "markdown",
    "timeout": 10,
    "budget_tokens": 200000,
    "target_selectors": ["<string>"],
    "wait_for_selectors": ["<string>"],
    "remove_selectors": ["<string>"],
    "openai_compatible": False,
    "remove_images": False,
    "with_links": False,
    "with_images": False,
    "with_images_alt": False,
    "with_iframes": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://www.example.com',
    model: 'jina-reader-v1',
    engine: 'direct',
    response_format: 'markdown',
    timeout: 10,
    budget_tokens: 200000,
    target_selectors: ['<string>'],
    wait_for_selectors: ['<string>'],
    remove_selectors: ['<string>'],
    openai_compatible: false,
    remove_images: false,
    with_links: false,
    with_images: false,
    with_images_alt: false,
    with_iframes: false
  })
};

fetch('https://geekai.co/api/v1/web_fetch', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://geekai.co/api/v1/web_fetch",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://www.example.com',
    'model' => 'jina-reader-v1',
    'engine' => 'direct',
    'response_format' => 'markdown',
    'timeout' => 10,
    'budget_tokens' => 200000,
    'target_selectors' => [
        '<string>'
    ],
    'wait_for_selectors' => [
        '<string>'
    ],
    'remove_selectors' => [
        '<string>'
    ],
    'openai_compatible' => false,
    'remove_images' => false,
    'with_links' => false,
    'with_images' => false,
    'with_images_alt' => false,
    'with_iframes' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://geekai.co/api/v1/web_fetch"

	payload := strings.NewReader("{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://geekai.co/api/v1/web_fetch")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://geekai.co/api/v1/web_fetch")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://www.example.com\",\n  \"model\": \"jina-reader-v1\",\n  \"engine\": \"direct\",\n  \"response_format\": \"markdown\",\n  \"timeout\": 10,\n  \"budget_tokens\": 200000,\n  \"target_selectors\": [\n    \"<string>\"\n  ],\n  \"wait_for_selectors\": [\n    \"<string>\"\n  ],\n  \"remove_selectors\": [\n    \"<string>\"\n  ],\n  \"openai_compatible\": false,\n  \"remove_images\": false,\n  \"with_links\": false,\n  \"with_images\": false,\n  \"with_images_alt\": false,\n  \"with_iframes\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "created": 123,
  "result": {
    "url": "<string>",
    "title": "<string>",
    "content": "<string>",
    "screenshot": "<string>",
    "links": {},
    "images": {},
    "metadata": {}
  },
  "usage": {
    "total_tokens": 1024,
    "completion_tokens": 1024
  }
}

{
  "code": "validation_error",
  "message": "参数验证失败",
  "details": {
    "field": "错误描述"
  }
}

{
  "code": "unauthorized",
  "message": "Invalid API key or token"
}

{
  "code": "invalid_request",
  "message": "请求参数不合法"
}

网页读取 API 是一个专为大模型设计的网页读取工具，可以返回更适合大模型处理的结果（包含网页标题、内容、摘要等信息）。支持多个读取引擎以及多种内容格式输出。

授权

Authorization

string

header

必填

API认证token

请求体

application/json

url

string

必填

网页URL

示例:

"https://www.example.com"

model

string

默认值:jina-reader-v1

网页读取模型

engine

enum<string>

默认值:direct

读取引擎，不同平台支持的读取引擎可能不一样，direct 表示速度优先，browser 表示质量优先

可用选项:

direct,

browser

response_format

enum<string>

默认值:markdown

返回内容格式

可用选项:

markdown,

html,

text,

screenshot

timeout

integer

默认值:10

等待网页加载超时时间，单位为秒

budget_tokens

integer

默认值:200000

预算token数

target_selectors

string[]

仅提取指定 CSS 选择器中的内容

wait_for_selectors

string[]

等待指定 CSS 选择器加载后再进行内容提取（适合异步加载内容的网页）

remove_selectors

string[]

提取内容时排除指定 CSS 选择器对应的内容

openai_compatible

boolean

默认值:false

是否返回OpenAI兼容格式的内容，默认为false

remove_images

boolean

默认值:false

是否移除网页内容中的图片，默认为false

with_links

boolean

默认值:false

是否单独返回去重后的所有链接列表，默认为false

with_images

boolean

默认值:false

是否单独返回去重后的图片URL列表，默认为false

with_images_alt

boolean

默认值:false

是否返回内容中图片的alt文本，默认为false

with_iframes

boolean

默认值:false

是否返回网页嵌入的iframe内容，默认为false

响应

成功响应

string

必填

请求ID

created

integer<unix-timestamp>

必填

请求创建时间戳

result

object

必填

网页读取结果

Show child attributes

usage

object

必填

Show child attributes

联网搜索图片生成

文本模型

画图模型

语音模型

视频模型

3D模型

模型列表

文件对话

OCR服务

令牌管理

账单信息

授权

请求体

响应