增加websocket支持

This commit is contained in:
2025-09-08 13:47:13 +08:00
parent 9caedd697d
commit 7e0fd53dd3
336 changed files with 9020 additions and 20356 deletions

View File

@@ -1,6 +1,3 @@
[submodule "cloudwego"]
[submodule "tools/asm2asm"]
path = tools/asm2asm
url = https://github.com/cloudwego/asm2asm.git
[submodule "tools/simde"]
path = tools/simde
url = https://github.com/simd-everywhere/simde.git
url = https://github.com/chenzhuoyu/asm2asm

View File

@@ -5,27 +5,18 @@ English | [中文](README_ZH_CN.md)
A blazingly fast JSON serializing & deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data).
## Requirement
- Go 1.16~1.22
- Linux / MacOS / Windows(need go1.17 above)
- Go 1.15~1.20
- Linux/MacOS/Windows
- Amd64 ARCH
## Features
- Runtime object binding without code generation
- Complete APIs for JSON value manipulation
- Fast, fast, fast!
## APIs
see [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
## Benchmarks
For **all sizes** of json and **all scenarios** of usage, **Sonic performs best**.
- [Medium](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13KB, 300+ key, 6 layers)
```powershell
goversion: 1.17.1
goos: darwin
@@ -85,21 +76,15 @@ BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.8
BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op
BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op
BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op
BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op
```
- [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers)
![small benchmarks](./docs/imgs/bench-small.png)
- [Large](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635KB, 10000+ key, 6 layers)
![large benchmarks](./docs/imgs/bench-large.png)
See [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) for benchmark codes.
See [bench.sh](https://github.com/bytedance/sonic/blob/main/bench.sh) for benchmark codes.
## How it works
See [INTRODUCTION.md](./docs/INTRODUCTION.md).
## Usage
@@ -107,7 +92,6 @@ See [INTRODUCTION.md](./docs/INTRODUCTION.md).
### Marshal/Unmarshal
Default behaviors are mostly consistent with `encoding/json`, except HTML escaping form (see [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) and `SortKeys` feature (optional support see [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)) that is **NOT** in conformity to [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
```go
import "github.com/bytedance/sonic"
@@ -119,11 +103,8 @@ err := sonic.Unmarshal(output, &data)
```
### Streaming IO
Sonic supports decoding json from `io.Reader` or encoding objects into `io.Writer`, aims at handling multiple values as well as reducing memory consumption.
Sonic supports decoding json from `io.Reader` or encoding objects into `io.`Writer`, aims at handling multiple values as well as reducing memory consumption.
- encoder
```go
var o1 = map[string]interface{}{
"a": "b",
@@ -138,9 +119,7 @@ fmt.Println(w.String())
// {"a":"b"}
// 1
```
- decoder
```go
var o = map[string]interface{}{}
var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
@@ -153,7 +132,6 @@ fmt.Printf("%+v", o)
```
### Use Number/Use Int64
```go
import "github.com/bytedance/sonic/decoder"
@@ -182,9 +160,7 @@ fm := root.Interface().(float64) // jn == jm
```
### Sort Keys
On account of the performance loss from sorting (roughly 10%), sonic doesn't enable this feature by default. If your component depends on it to work (like [zstd](https://github.com/facebook/zstd)), Use it like this:
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/encoder"
@@ -197,26 +173,19 @@ v, err := encoder.Encode(m, encoder.SortMapKeys)
var root := sonic.Get(JSON)
err := root.SortKeys()
```
### Escape HTML
On account of the performance loss (roughly 15%), sonic doesn't enable this feature by default. You can use `encoder.EscapeHTML` option to open this feature (align with `encoding/json.HTMLEscape`).
```go
import "github.com/bytedance/sonic"
v := map[string]string{"&&":"<>"}
ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}`
```
### Compact Format
Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DONOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process.
### Print Error
If there invalid syntax in input JSON, sonic will return `decoder.SyntaxError`, which supports pretty-printing of error position
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
@@ -242,9 +211,7 @@ if err != nil {
```
#### Mismatched Types [Sonic v1.6.0]
If there a **mismatch-typed** value for a given key, sonic will report `decoder.MismatchTypeError` (if there are many, report the last one), but still skip wrong the value and keep decoding next JSON.
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
@@ -257,15 +224,10 @@ err := UnmarshalString(`{"A":"1","B":1}`, &data)
println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
fmt.Printf("%+v", data) // {A:0 B:1}
```
### Ast.Node
Sonic/ast.Node is a completely self-contained AST for JSON. It implements serialization and deserialization both and provides robust APIs for obtaining and modification of generic data.
#### Get/Index
Search partial JSON by given paths, which must be non-negative integer or string, or nil
```go
import "github.com/bytedance/sonic"
@@ -279,13 +241,10 @@ raw := root.Raw() // == string(input)
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
**Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched.
#### Set/Unset
Modify the json content by Set()/Unset()
```go
import "github.com/bytedance/sonic"
@@ -302,9 +261,7 @@ println(root.Get("key4").Check()) // "value not exist"
```
#### Serialize
To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer)
```go
import (
"encoding/json"
@@ -318,7 +275,6 @@ println(string(buf) == string(exp)) // true
```
#### APIs
- validation: `Check()`, `Error()`, `Valid()`, `Exist()`
- searching: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
- go-type casting: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
@@ -326,55 +282,13 @@ println(string(buf) == string(exp)) // true
- iteration: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
- modification: `Set()`, `SetByIndex()`, `Add()`
### Ast.Visitor
Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`:
```go
type UserNode interface {}
// the following types implement the UserNode interface.
type (
UserNull struct{}
UserBool struct{ Value bool }
UserInt64 struct{ Value int64 }
UserFloat64 struct{ Value float64 }
UserString struct{ Value string }
UserObject struct{ Value map[string]UserNode }
UserArray struct{ Value []UserNode }
)
```
Sonic provides the following API to return **the preorder traversal of a JSON AST**. The `ast.Visitor` is a SAX style interface which is used in some C++ JSON library. You should implement `ast.Visitor` by yourself and pass it to `ast.Preorder()` method. In your visitor you can make your custom types to represent JSON values. There may be an O(n) space container (such as stack) in your visitor to record the object / array hierarchy.
```go
func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
type Visitor interface {
OnNull() error
OnBool(v bool) error
OnString(v string) error
OnInt64(v int64, n json.Number) error
OnFloat64(v float64, n json.Number) error
OnObjectBegin(capacity int) error
OnObjectKey(key string) error
OnObjectEnd() error
OnArrayBegin(capacity int) error
OnArrayEnd() error
}
```
See [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) for detailed usage. We also implement a demo visitor for `UserNode` in [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go).
## Compatibility
Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments, we have the following suggestions:
- Developing on **Mac M1**: Make sure you have Rosetta 2 installed on your machine, and set `GOARCH=amd64` when building your application. Rosetta 2 can automatically translate x86 binaries to arm64 binaries and run x86 applications on Mac M1.
- Developing on **Linux arm64**: You can install qemu and use the `qemu-x86_64 -cpu max` command to convert x86 binaries to amr64 binaries for applications built with sonic. The qemu can achieve a similar transfer effect to Rosetta 2 on Mac M1.
For developers who want to use sonic on Linux arm64 without qemu, or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API`
- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options like `SortKeys=false` will be invalid.
- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It will fall back to `encoding/json`.
- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options will be invalid.
@@ -382,9 +296,7 @@ For developers who want to use sonic on Linux arm64 without qemu, or those who w
## Tips
### Pretouch
Since Sonic uses [golang-asm](https://github.com/twitchyliquid64/golang-asm) as a JIT assembler, which is NOT very suitable for runtime compiling, first-hit running of a huge schema may cause request-timeout or even process-OOM. For better stability, we advise **using `Pretouch()` for huge-schema or compact-memory applications** before `Marshal()/Unmarshal()`.
```go
import (
"reflect"
@@ -399,7 +311,7 @@ func init() {
err := sonic.Pretouch(reflect.TypeOf(v))
// with more CompileOption...
err := sonic.Pretouch(reflect.TypeOf(v),
err := sonic.Pretouch(reflect.TypeOf(v),
// If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
// you can set compile recursive loops in Pretouch for better stability in JIT.
option.WithCompileRecursiveDepth(loop),
@@ -410,23 +322,17 @@ func init() {
```
### Copy string
When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. - `Config.CopyString`/`decoder.CopyString()`: We provide the option for `Decode()` / `Unmarshal()` users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree.
- `GetFromStringNoCopy()`: For memory safety, `sonic.Get()` / `sonic.GetFromString()` now copies return JSON. If users want to get json more quickly and not care about memory usage, you can use `GetFromStringNoCopy()` to return a JSON directly referenced from source.
When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. We provide the option `decoder.CopyString()` for users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree.
### Pass string or []byte?
For alignment to `encoding/json`, we provide API to pass `[]byte` as an argument, but the string-to-bytes copy is conducted at the same time considering safety, which may lose performance when the origin JSON is huge. Therefore, you can use `UnmarshalString()` and `GetFromString()` to pass a string, as long as your origin data is a string or **nocopy-cast** is safe for your []byte. We also provide API `MarshalString()` for convenient **nocopy-cast** of encoded JSON []byte, which is safe since sonic's output bytes is always duplicated and unique.
### Accelerate `encoding.TextMarshaler`
To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259).
### Better performance for generic data
In **fully-parsed** scenario, `Unmarshal()` performs better than `Get()`+`Node.Interface()`. But if you only have a part of the schema for specific json, you can combine `Get()` and `Unmarshal()` together:
```go
import "github.com/bytedance/sonic"
@@ -434,9 +340,7 @@ node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
var user User // your partial schema...
err = sonic.UnmarshalString(node.Raw(), &user)
```
Even if you don't have any schema, use `ast.Node` as the container of generic values instead of `map` or `interface`:
```go
import "github.com/bytedance/sonic"
@@ -447,25 +351,12 @@ err = user.Check()
// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
go someFunc(user)
```
Why? Because `ast.Node` stores its children using `array`:
- `Array`'s performance is **much better** than `Map` when Inserting (Deserialize) and Scanning (Serialize) data;
- **Hashing** (`map[x]`) is not as efficient as **Indexing** (`array[x]`), which `ast.Node` can conduct on **both array and object**;
- Using `Interface()`/`Map()` means Sonic must parse all the underlying values, while `ast.Node` can parse them **on demand**.
**CAUTION:** `ast.Node` **DOESN'T** ensure concurrent security directly, due to its **lazy-load** design. However, you can call `Node.Load()`/`Node.LoadAll()` to achieve that, which may bring performance reduction while it still works faster than converting to `map` or `interface{}`
### Ast.Node or Ast.Visitor?
For generic data, `ast.Node` should be enough for your needs in most cases.
However, `ast.Node` is designed for partially processing JSON string. It has some special designs such as lazy-load which might not be suitable for directly parsing the whole JSON string like `Unmarshal()`. Although `ast.Node` is better then `map` or `interface{}`, it's also a kind of intermediate representation after all if your final types are customized and you have to convert the above types to your custom types after parsing.
For better performance, in previous case the `ast.Visitor` will be the better choice. It performs JSON decoding like `Unmarshal()` and you can directly use your final types to represents a JSON AST without any intermediate representations.
But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API.
## Community
Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem.

View File

@@ -6,14 +6,10 @@
## 依赖
- Go 1.16~1.22
- Linux / MacOS / Windows(需要 Go1.17 以上)
- Go 1.15~1.20
- Linux/MacOS/Windows
- Amd64 架构
## 接口
详见 [go.dev](https://pkg.go.dev/github.com/bytedance/sonic)
## 特色
- 运行时对象绑定,无需代码生成
@@ -23,9 +19,7 @@
## 基准测试
对于**所有大小**的 json 和**所有使用场景** **Sonic 表现均为最佳**
- [中型](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13kB, 300+ 键, 6 层)
```powershell
goversion: 1.17.1
goos: darwin
@@ -85,18 +79,13 @@ BenchmarkSetOne_Jsoniter-16 79475 ns/op 163.8
BenchmarkSetOne_Parallel_Sonic-16 850.9 ns/op 15305.31 MB/s 1584 B/op 17 allocs/op
BenchmarkSetOne_Parallel_Sjson-16 18194 ns/op 715.77 MB/s 52247 B/op 9 allocs/op
BenchmarkSetOne_Parallel_Jsoniter-16 33560 ns/op 388.05 MB/s 45892 B/op 964 allocs/op
BenchmarkLoadNode/LoadAll()-16 11384 ns/op 1143.93 MB/s 6307 B/op 25 allocs/op
BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.68 MB/s 7145 B/op 25 allocs/op
BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op
BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op
```
- [小型](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 个键, 3 层)
![small benchmarks](./docs/imgs/bench-small.png)
- [大型](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635kB, 10000+ 个键, 6 层)
![large benchmarks](./docs/imgs/bench-large.png)
要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) 。
要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/bench.sh) 。
## 工作原理
@@ -107,7 +96,6 @@ BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.7
### 序列化/反序列化
默认的行为基本上与 `encoding/json` 相一致,除了 HTML 转义形式(参见 [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) 和 `SortKeys` 功能(参见 [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)**没有**遵循 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 。
```go
import "github.com/bytedance/sonic"
@@ -121,9 +109,7 @@ err := sonic.Unmarshal(output, &data)
### 流式输入输出
Sonic 支持解码 `io.Reader` 中输入的 json或将对象编码为 json 后输出至 `io.Writer`,以处理多个值并减少内存消耗。
- 编码器
```go
var o1 = map[string]interface{}{
"a": "b",
@@ -138,9 +124,7 @@ fmt.Println(w.String())
// {"a":"b"}
// 1
```
- 解码器
```go
var o = map[string]interface{}{}
var r = strings.NewReader(`{"a":"b"}{"1":"2"}`)
@@ -184,7 +168,6 @@ fm := root.Interface().(float64) // jn == jm
### 对键排序
考虑到排序带来的性能损失(约 10% sonic 默认不会启用这个功能。如果你的组件依赖这个行为(如 [zstd](https://github.com/facebook/zstd)) ,可以仿照下面的例子:
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/encoder"
@@ -201,7 +184,6 @@ err := root.SortKeys()
### HTML 转义
考虑到性能损失约15% sonic 默认不会启用这个功能。你可以使用 `encoder.EscapeHTML` 选项来开启(与 `encoding/json.HTMLEscape` 行为一致)。
```go
import "github.com/bytedance/sonic"
@@ -210,13 +192,11 @@ ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"
```
### 紧凑格式
Sonic 默认将基本类型( `struct` `map` 等)编码为紧凑格式的 JSON ,除非使用 `json.RawMessage` or `json.Marshaler` 进行编码: sonic 确保输出的 JSON 合法,但出于性能考虑,**不会**加工成紧凑格式。我们提供选项 `encoder.CompactMarshaler` 来添加此过程,
### 打印错误
如果输入的 JSON 存在无效的语法sonic 将返回 `decoder.SyntaxError`,该错误支持错误位置的美化输出。
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
@@ -244,7 +224,6 @@ if err != nil {
#### 类型不匹配 [Sonic v1.6.0]
如果给定键中存在**类型不匹配**的值, sonic 会抛出 `decoder.MismatchTypeError` (如果有多个,只会报告最后一个),但仍会跳过错误的值并解码下一个 JSON 。
```go
import "github.com/bytedance/sonic"
import "github.com/bytedance/sonic/decoder"
@@ -257,7 +236,6 @@ err := UnmarshalString(`{"A":"1","B":1}`, &data)
println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n"
fmt.Printf("%+v", data) // {A:0 B:1}
```
### `Ast.Node`
Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改通用数据的鲁棒的 API。
@@ -265,7 +243,6 @@ Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列
#### 查找/索引
通过给定的路径搜索 JSON 片段,路径必须为非负整数,字符串或 `nil` 。
```go
import "github.com/bytedance/sonic"
@@ -279,13 +256,11 @@ raw := root.Raw() // == string(input)
root, err := sonic.Get(input, "key1", 1, "key2")
sub := root.Get("key3").Index(2).Int64() // == 3
```
**注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。
#### 修改
使用 `Set()` / `Unset()` 修改 json 的内容
使用 ` Set()` / `Unset()` 修改 json 的内容
```go
import "github.com/bytedance/sonic"
@@ -302,9 +277,7 @@ println(root.Get("key4").Check()) // "value not exist"
```
#### 序列化
要将 `ast.Node` 编码为 json ,使用 `MarshalJson()` 或者 `json.Marshal()` (必须传递指向节点的指针)
```go
import (
"encoding/json"
@@ -318,7 +291,6 @@ println(string(buf) == string(exp)) // true
```
#### APIs
- 合法性检查: `Check()`, `Error()`, `Valid()`, `Exist()`
- 索引: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()`
- 转换至 go 内置类型: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()`
@@ -326,55 +298,13 @@ println(string(buf) == string(exp)) // true
- 迭代: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
- 修改: `Set()`, `SetByIndex()`, `Add()`
### `Ast.Visitor`
Sonic 提供了一个高级的 API 用于直接全量解析 JSON 到非标准容器里 (既不是 `struct` 也不是 `map[string]interface{}`) 且不需要借助任何中间表示 (`ast.Node` 或 `interface{}`)。举个例子,你可能定义了下述的类型,它们看起来像 `interface{}`,但实际上并不是:
```go
type UserNode interface {}
// the following types implement the UserNode interface.
type (
UserNull struct{}
UserBool struct{ Value bool }
UserInt64 struct{ Value int64 }
UserFloat64 struct{ Value float64 }
UserString struct{ Value string }
UserObject struct{ Value map[string]UserNode }
UserArray struct{ Value []UserNode }
)
```
Sonic 提供了下述的 API 来返回 **“对 JSON AST 的前序遍历”**。`ast.Visitor` 是一个 SAX 风格的接口,这在某些 C++ 的 JSON 解析库中被使用到。你需要自己实现一个 `ast.Visitor`,将它传递给 `ast.Preorder()` 方法。在你的实现中你可以使用自定义的类型来表示 JSON 的值。在你的 `ast.Visitor` 中,可能需要有一个 O(n) 空间复杂度的容器(比如说栈)来记录 object / array 的层级。
```go
func Preorder(str string, visitor Visitor, opts *VisitorOptions) error
type Visitor interface {
OnNull() error
OnBool(v bool) error
OnString(v string) error
OnInt64(v int64, n json.Number) error
OnFloat64(v float64, n json.Number) error
OnObjectBegin(capacity int) error
OnObjectKey(key string) error
OnObjectEnd() error
OnArrayBegin(capacity int) error
OnArrayEnd() error
}
```
详细用法参看 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go),我们还为 `UserNode` 实现了一个示例 `ast.Visitor`,你可以在 [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go) 中找到它。
## 兼容性
由于开发高性能代码的困难性, Sonic **不**保证对所有环境的支持。对于在不同环境中使用 Sonic 构建应用程序的开发者,我们有以下建议:
- 在 **Mac M1** 上开发:确保在您的计算机上安装了 Rosetta 2并在构建时设置 `GOARCH=amd64` 。 Rosetta 2 可以自动将 x86 二进制文件转换为 arm64 二进制文件,并在 Mac M1 上运行 x86 应用程序。
- 在 **Linux arm64** 上开发:您可以安装 qemu 并使用 `qemu-x86_64 -cpu max` 命令来将 x86 二进制文件转换为 arm64 二进制文件。qemu可以实现与Mac M1上的Rosetta 2类似的转换效果。
对于希望在不使用 qemu 下使用 sonic 的开发者,或者希望处理 JSON 时与 `encoding/JSON` 严格保持一致的开发者,我们在 `sonic.API` 中提供了一些兼容性 API
- `ConfigDefault`: 在支持 sonic 的环境下 sonic 的默认配置(`EscapeHTML=false``SortKeys=false`等)。行为与具有相应配置的 `encoding/json` 一致,一些选项,如 `SortKeys=false` 将无效。
- `ConfigStd`: 在支持 sonic 的环境下与标准库兼容的配置(`EscapeHTML=true``SortKeys=true`等)。行为与 `encoding/json` 一致。
- `ConfigFastest`: 在支持 sonic 的环境下运行最快的配置(`NoQuoteTextMarshaler=true`)。行为与具有相应配置的 `encoding/json` 一致,某些选项将无效。
@@ -382,9 +312,7 @@ type Visitor interface {
## 注意事项
### 预热
由于 Sonic 使用 [golang-asm](https://github.com/twitchyliquid64/golang-asm) 作为 JIT 汇编器,这个库并不适用于运行时编译,第一次运行一个大型模式可能会导致请求超时甚至进程内存溢出。为了更好地稳定性,我们建议在运行大型模式或在内存有限的应用中,在使用 `Marshal()/Unmarshal()` 前运行 `Pretouch()`。
```go
import (
"reflect"
@@ -399,7 +327,7 @@ func init() {
err := sonic.Pretouch(reflect.TypeOf(v))
// with more CompileOption...
err := sonic.Pretouch(reflect.TypeOf(v),
err := sonic.Pretouch(reflect.TypeOf(v),
// If the type is too deep nesting (nesting depth > option.DefaultMaxInlineDepth),
// you can set compile recursive loops in Pretouch for better stability in JIT.
option.WithCompileRecursiveDepth(loop),
@@ -414,17 +342,16 @@ func init() {
当解码 **没有转义字符的字符串**时, sonic 会从原始的 JSON 缓冲区内引用而不是复制到新的一个缓冲区中。这对 CPU 的性能方面很有帮助,但是可能因此在解码后对象仍在使用的时候将整个 JSON 缓冲区保留在内存中。实践中我们发现,通过引用 JSON 缓冲区引入的额外内存通常是解码后对象的 20% 至 80% ,一旦应用长期保留这些对象(如缓存以备重用),服务器所使用的内存可能会增加。我们提供了选项 `decoder.CopyString()` 供用户选择,不引用 JSON 缓冲区。这可能在一定程度上降低 CPU 性能。
### 传递字符串还是字节数组?
为了和 `encoding/json` 保持一致,我们提供了传递 `[]byte` 作为参数的 API ,但考虑到安全性,字符串到字节的复制是同时进行的,这在原始 JSON 非常大时可能会导致性能损失。因此,你可以使用 `UnmarshalString()` 和 `GetFromString()` 来传递字符串,只要你的原始数据是字符串,或**零拷贝类型转换**对于你的字节数组是安全的。我们也提供了 `MarshalString()` 的 API ,以便对编码的 JSON 字节数组进行**零拷贝类型转换**,因为 sonic 输出的字节始终是重复并且唯一的,所以这样是安全的。
### 加速 `encoding.TextMarshaler`
为了保证数据安全性, `sonic.Encoder` 默认会对来自 `encoding.TextMarshaler` 接口的字符串进行引用和转义,如果大部分数据都是这种形式那可能会导致很大的性能损失。我们提供了 `encoder.NoQuoteTextMarshaler` 选项来跳过这些操作,但你**必须**保证他们的输出字符串依照 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 进行了转义和引用。
### 泛型的性能优化
在 **完全解析**的场景下, `Unmarshal()` 表现得比 `Get()`+`Node.Interface()` 更好。但是如果你只有特定 JSON 的部分模式,你可以将 `Get()` 和 `Unmarshal()` 结合使用:
```go
import "github.com/bytedance/sonic"
@@ -432,9 +359,7 @@ node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user")
var user User // your partial schema...
err = sonic.UnmarshalString(node.Raw(), &user)
```
甚至如果你没有任何模式,可以用 `ast.Node` 代替 `map` 或 `interface` 作为泛型的容器:
```go
import "github.com/bytedance/sonic"
@@ -445,25 +370,13 @@ err = user.Check()
// err = user.LoadAll() // only call this when you want to use 'user' concurrently...
go someFunc(user)
```
为什么?因为 `ast.Node` 使用 `array` 来存储其子节点:
- 在插入(反序列化)和扫描(序列化)数据时,`Array` 的性能比 `Map` **好得多**
- **哈希**`map[x]`)的效率不如**索引**`array[x]`)高效,而 `ast.Node` 可以在数组和对象上使用索引;
- 使用 `Interface()` / `Map()` 意味着 sonic 必须解析所有的底层值,而 `ast.Node` 可以**按需解析**它们。
**注意**:由于 `ast.Node` 的惰性加载设计,其**不能**直接保证并发安全性,但你可以调用 `Node.Load()` / `Node.LoadAll()` 来实现并发安全。尽管可能会带来性能损失,但仍比转换成 `map` 或 `interface{}` 更为高效。
### 使用 `ast.Node` 还是 `ast.Visitor`
对于泛型数据的解析,`ast.Node` 在大多数场景上应该能够满足你的需求。
然而,`ast.Node` 是一种针对部分解析 JSON 而设计的泛型容器,它包含一些特殊设计,比如惰性加载,如果你希望像 `Unmarshal()` 那样直接解析整个 JSON这些设计可能并不合适。尽管 `ast.Node` 相较于 `map` 或 `interface{}` 来说是更好的一种泛型容器,但它毕竟也是一种中间表示,如果你的最终类型是自定义的,你还得在解析完成后将上述类型转化成你自定义的类型。
在上述场景中,如果想要有更极致的性能,`ast.Visitor` 会是更好的选择。它采用和 `Unmarshal()` 类似的形式解析 JSON并且你可以直接使用你的最终类型去表示 JSON AST而不需要经过额外的任何中间表示。
但是,`ast.Visitor` 并不是一个很易用的 API。你可能需要写大量的代码去实现自己的 `ast.Visitor`,并且需要在解析过程中仔细维护树的层级。如果你决定要使用这个 API请先仔细阅读 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) 中的注释。
## 社区
Sonic 是 [CloudWeGo](https://www.cloudwego.io/) 下的一个子项目。我们致力于构建云原生生态系统。

View File

@@ -20,7 +20,6 @@ import (
`io`
`github.com/bytedance/sonic/ast`
`github.com/bytedance/sonic/internal/rt`
)
// Config is a combination of sonic/encoder.Options and sonic/decoder.Options
@@ -69,18 +68,11 @@ type Config struct {
// ValidateString indicates decoder and encoder to valid string values: decoder will return errors
// when unescaped control chars(\u0000-\u001f) in the string value of JSON.
ValidateString bool
// NoValidateJSONMarshaler indicates that the encoder should not validate the output string
// after encoding the JSONMarshaler to JSON.
NoValidateJSONMarshaler bool
// NoEncoderNewline indicates that the encoder should not add a newline after every message
NoEncoderNewline bool
ValidateString bool
}
var (
// ConfigDefault is the default config of APIs, aiming at efficiency and safety.
// ConfigDefault is the default config of APIs, aiming at efficiency and safty.
ConfigDefault = Config{}.Froze()
// ConfigStd is the standard config of APIs, aiming at being compatible with encoding/json.
@@ -95,7 +87,6 @@ var (
// ConfigFastest is the fastest config of APIs, aiming at speed.
ConfigFastest = Config{
NoQuoteTextMarshaler: true,
NoValidateJSONMarshaler: true,
}.Froze()
)
@@ -118,7 +109,7 @@ type API interface {
NewEncoder(writer io.Writer) Encoder
// NewDecoder create a Decoder holding reader
NewDecoder(reader io.Reader) Decoder
// Valid validates the JSON-encoded bytes and reports if it is valid
// Valid validates the JSON-encoded bytes and reportes if it is valid
Valid(data []byte) bool
}
@@ -174,41 +165,22 @@ func UnmarshalString(buf string, val interface{}) error {
return ConfigDefault.UnmarshalFromString(buf, val)
}
// Get searches and locates the given path from src json,
// and returns a ast.Node representing the partially json.
// Get searches the given path from json,
// and returns its representing ast.Node.
//
// Each path arg must be integer or string:
// - Integer is target index(>=0), means searching current node as array.
// - String is target key, means searching current node as object.
//
//
// Notice: It expects the src json is **Well-formed** and **Immutable** when calling,
// otherwise it may return unexpected result.
// Considering memory safety, the returned JSON is **Copied** from the input
// Note, the api expects the json is well-formed at least,
// otherwise it may return unexpected result.
func Get(src []byte, path ...interface{}) (ast.Node, error) {
return GetCopyFromString(rt.Mem2Str(src), path...)
return GetFromString(string(src), path...)
}
// GetFromString is same with Get except src is string.
//
// WARNING: The returned JSON is **Referenced** from the input.
// Caching or long-time holding the returned node may cause OOM.
// If your src is big, consider use GetFromStringCopy().
// GetFromString is same with Get except src is string,
// which can reduce unnecessary memory copy.
func GetFromString(src string, path ...interface{}) (ast.Node, error) {
return ast.NewSearcher(src).GetByPath(path...)
}
// GetCopyFromString is same with Get except src is string
func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) {
return ast.NewSearcher(src).GetByPathCopy(path...)
}
// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
return ConfigDefault.Valid(data)
}
// Valid reports whether data is a valid JSON encoding.
func ValidString(data string) bool {
return ConfigDefault.Valid(rt.Str2Mem(data))
}
}

View File

@@ -1,40 +1,36 @@
// +build !amd64,!arm64 go1.23 !go1.16 arm64,!go1.20
// +build !amd64 go1.21
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`encoding/base64`
`encoding/json`
`unicode/utf8`
`fmt`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
func init() {
println("WARNING:(ast) sonic only supports Go1.16~1.22, but your environment is not suitable")
}
func quote(buf *[]byte, val string) {
quoteString(buf, val)
}
// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
@@ -44,9 +40,16 @@ func unquote(src string) (string, types.ParsingError) {
return rt.Mem2Str(out), 0
}
func decodeBase64(src string) ([]byte, error) {
return base64.StdEncoding.DecodeString(src)
}
func encodeBase64(src []byte) string {
return base64.StdEncoding.EncodeToString(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
e, v := decodeValue(self.s, self.p)
if e < 0 {
return v
}
@@ -81,34 +84,37 @@ func (self *Node) encodeInterface(buf *[]byte) error {
return nil
}
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
self.parser.p = 0
var err types.ParsingError
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err := self.searchIndex(idx); err != 0 {
return self.p, err
if err = self.parser.searchIndex(idx); err != 0 {
return Node{}, self.parser.ExportError(err)
}
} else if key, ok := p.(string); ok {
if err := self.searchKey(key); err != 0 {
return self.p, err
if err = self.parser.searchKey(key); err != 0 {
return Node{}, self.parser.ExportError(err)
}
} else {
panic("path must be either int(>=0) or string")
}
}
var start int
var e types.ParsingError
if validate {
start, e = self.skip()
} else {
start, e = self.skipFast()
var start = self.parser.p
if start, err = self.parser.skip(); err != 0 {
return Node{}, self.parser.ExportError(err)
}
if e != 0 {
return self.p, e
ns := len(self.parser.s)
if self.parser.p > ns || start >= ns || start>=self.parser.p {
return Node{}, fmt.Errorf("skip %d char out of json boundary", start)
}
return start, 0
}
func validate_utf8(str string) bool {
return utf8.ValidString(str)
}
t := switchRawType(self.parser.s[start])
if t == _V_NONE {
return Node{}, self.parser.ExportError(err)
}
return newRawNode(self.parser.s[start:self.parser.p], t), nil
}

View File

@@ -220,7 +220,7 @@ func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
return ret, v, nil
}
func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
func decodeValue(src string, pos int) (ret int, v types.JsonState) {
pos = skipBlank(src, pos)
if pos < 0 {
return pos, types.JsonState{Vt: types.ValueType(pos)}
@@ -256,30 +256,20 @@ func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState)
}
return ret, types.JsonState{Vt: types.V_FALSE}
case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
if skipnum {
ret = skipNumber(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
} else {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
} else {
var iv int64
ret, iv, _ = decodeInt64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
} else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
var fv float64
ret, fv, _ = decodeFloat64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
} else {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
var iv int64
ret, iv, _ = decodeInt64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
} else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
var fv float64
ret, fv, _ = decodeFloat64(src, pos)
if ret >= 0 {
return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
} else {
return ret, types.JsonState{Vt: types.ValueType(ret)}
}
default:
return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
}
@@ -583,36 +573,3 @@ func skipArray(src string, pos int) (ret int, start int) {
pos++
}
}
// DecodeString decodes a JSON string from pos and return golang string.
// - needEsc indicates if to unescaped escaping chars
// - hasEsc tells if the returned string has escaping chars
// - validStr enables validating UTF8 charset
//
func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
if validStr && !validate_utf8(str) {
return "", -int(types.ERR_INVALID_UTF8), false
}
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := unquote(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}

View File

@@ -19,6 +19,8 @@ package ast
import (
`sync`
`unicode/utf8`
`github.com/bytedance/sonic/internal/rt`
)
const (
@@ -163,18 +165,18 @@ func (self *Node) encodeFalse(buf *[]byte) error {
}
func (self *Node) encodeNumber(buf *[]byte) error {
str := self.toString()
str := rt.StrFrom(self.p, self.v)
*buf = append(*buf, str...)
return nil
}
func (self *Node) encodeString(buf *[]byte) error {
if self.l == 0 {
if self.v == 0 {
*buf = append(*buf, '"', '"')
return nil
}
quote(buf, self.toString())
quote(buf, rt.StrFrom(self.p, self.v))
return nil
}
@@ -193,17 +195,16 @@ func (self *Node) encodeArray(buf *[]byte) error {
*buf = append(*buf, '[')
var started bool
for i := 0; i < nb; i++ {
n := self.nodeAt(i)
if !n.Exists() {
continue
}
if started {
*buf = append(*buf, ',')
}
started = true
if err := n.encode(buf); err != nil {
var p = (*Node)(self.p)
err := p.encode(buf)
if err != nil {
return err
}
for i := 1; i < nb; i++ {
*buf = append(*buf, ',')
p = p.unsafe_next()
err := p.encode(buf)
if err != nil {
return err
}
}
@@ -239,21 +240,20 @@ func (self *Node) encodeObject(buf *[]byte) error {
*buf = append(*buf, '{')
var started bool
for i := 0; i < nb; i++ {
n := self.pairAt(i)
if n == nil || !n.Value.Exists() {
continue
}
if started {
*buf = append(*buf, ',')
}
started = true
if err := n.encode(buf); err != nil {
var p = (*Pair)(self.p)
err := p.encode(buf)
if err != nil {
return err
}
for i := 1; i < nb; i++ {
*buf = append(*buf, ',')
p = p.unsafe_next()
err := p.encode(buf)
if err != nil {
return err
}
}
*buf = append(*buf, '}')
return nil
}
}

View File

@@ -8,33 +8,6 @@ import (
`github.com/bytedance/sonic/internal/native/types`
)
func newError(err types.ParsingError, msg string) *Node {
return &Node{
t: V_ERROR,
l: uint(err),
p: unsafe.Pointer(&msg),
}
}
// Error returns error message if the node is invalid
func (self Node) Error() string {
if self.t != V_ERROR {
return ""
} else {
return *(*string)(self.p)
}
}
func newSyntaxError(err SyntaxError) *Node {
msg := err.Description()
return &Node{
t: V_ERROR,
l: uint(err.Code),
p: unsafe.Pointer(&msg),
}
}
func (self *Parser) syntaxError(err types.ParsingError) SyntaxError {
return SyntaxError{
Pos : self.p,
@@ -43,17 +16,12 @@ func (self *Parser) syntaxError(err types.ParsingError) SyntaxError {
}
}
func unwrapError(err error) *Node {
if se, ok := err.(*Node); ok {
return se
}else if sse, ok := err.(Node); ok {
return &sse
} else {
msg := err.Error()
return &Node{
t: V_ERROR,
p: unsafe.Pointer(&msg),
}
func newSyntaxError(err SyntaxError) *Node {
msg := err.Description()
return &Node{
t: V_ERROR,
v: int64(err.Code),
p: unsafe.Pointer(&msg),
}
}

View File

@@ -32,11 +32,7 @@ func (self *Node) Values() (ListIterator, error) {
if err := self.should(types.V_ARRAY, "an array"); err != nil {
return ListIterator{}, err
}
return self.values(), nil
}
func (self *Node) values() ListIterator {
return ListIterator{Iterator{p: self}}
return ListIterator{Iterator{p: self}}, nil
}
// Properties returns iterator for object's children traversal
@@ -44,11 +40,7 @@ func (self *Node) Properties() (ObjectIterator, error) {
if err := self.should(types.V_OBJECT, "an object"); err != nil {
return ObjectIterator{}, err
}
return self.properties(), nil
}
func (self *Node) properties() ObjectIterator {
return ObjectIterator{Iterator{p: self}}
return ObjectIterator{Iterator{p: self}}, nil
}
type Iterator struct {
@@ -90,54 +82,26 @@ type ObjectIterator struct {
Iterator
}
func (self *ListIterator) next() *Node {
next_start:
if !self.HasNext() {
return nil
} else {
n := self.p.nodeAt(self.i)
self.i++
if !n.Exists() {
goto next_start
}
return n
}
}
// Next scans through children of underlying V_ARRAY,
// copies each child to v, and returns .HasNext().
func (self *ListIterator) Next(v *Node) bool {
n := self.next()
if n == nil {
return false
}
*v = *n
return true
}
func (self *ObjectIterator) next() *Pair {
next_start:
if !self.HasNext() {
return nil
return false
} else {
n := self.p.pairAt(self.i)
self.i++
if n == nil || !n.Value.Exists() {
goto next_start
}
return n
*v, self.i = *self.p.nodeAt(self.i), self.i + 1
return true
}
}
// Next scans through children of underlying V_OBJECT,
// copies each child to v, and returns .HasNext().
func (self *ObjectIterator) Next(p *Pair) bool {
n := self.next()
if n == nil {
if !self.HasNext() {
return false
} else {
*p, self.i = *self.p.pairAt(self.i), self.i + 1
return true
}
*p = *n
return true
}
// Sequence represents scanning path of single-layer nodes.
@@ -165,39 +129,36 @@ type Scanner func(path Sequence, node *Node) bool
//
// Especailly, if the node is not V_ARRAY or V_OBJECT,
// the node itself will be returned and Sequence.Index == -1.
//
// NOTICE: A unsetted node WON'T trigger sc, but its index still counts into Path.Index
func (self *Node) ForEach(sc Scanner) error {
switch self.itype() {
case types.V_ARRAY:
iter, err := self.Values()
ns, err := self.UnsafeArray()
if err != nil {
return err
}
v := iter.next()
for v != nil {
if !sc(Sequence{iter.i-1, nil}, v) {
return nil
for i := range ns {
if !sc(Sequence{i, nil}, &ns[i]) {
return err
}
v = iter.next()
}
case types.V_OBJECT:
iter, err := self.Properties()
ns, err := self.UnsafeMap()
if err != nil {
return err
}
v := iter.next()
for v != nil {
if !sc(Sequence{iter.i-1, &v.Key}, &v.Value) {
return nil
for i := range ns {
if !sc(Sequence{i, &ns[i].Key}, &ns[i].Value) {
return err
}
v = iter.next()
}
default:
if self.Check() != nil {
return self
}
sc(Sequence{-1, nil}, self)
}
return nil
return self.Check()
}
type PairSlice []Pair
func (self PairSlice) Sort() {
radixQsort(self, 0, maxDepth(len(self)))
}

File diff suppressed because it is too large Load Diff

View File

@@ -18,15 +18,11 @@ package ast
import (
`fmt`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
const (
_DEFAULT_NODE_CAP int = 8
_APPEND_GROW_SHIFT = 1
)
const _DEFAULT_NODE_CAP int = 16
const (
_ERR_NOT_FOUND types.ParsingError = 33
@@ -34,10 +30,7 @@ const (
)
var (
// ErrNotExist means both key and value doesn't exist
ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
// ErrUnsupportType means API on the node is unsupported
ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
)
@@ -46,7 +39,6 @@ type Parser struct {
s string
noLazy bool
skipValue bool
dbuf *byte
}
/** Parser Private Methods **/
@@ -115,7 +107,7 @@ func (self *Parser) lspace(sp int) int {
return sp
}
func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) {
sp := self.p
ns := len(self.s)
@@ -127,7 +119,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
/* check for empty array */
if self.s[self.p] == ']' {
self.p++
return Node{t: types.V_ARRAY}, 0
return emptyArrayNode, 0
}
/* allocate array space and parse every element */
@@ -157,7 +149,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
}
/* add the value to result */
ret.Push(val)
ret = append(ret, val)
self.p = self.lspace(self.p)
/* check for EOF */
@@ -168,17 +160,17 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
/* check for the next character */
switch self.s[self.p] {
case ',' : self.p++
case ']' : self.p++; return newArray(ret), 0
default:
// if val.isLazy() {
// return newLazyArray(self, ret), 0
// }
return Node{}, types.ERR_INVALID_CHAR
case ']' : self.p++; return NewArray(ret), 0
default:
if val.isLazy() {
return newLazyArray(self, ret), 0
}
return Node{}, types.ERR_INVALID_CHAR
}
}
}
func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) {
sp := self.p
ns := len(self.s)
@@ -190,7 +182,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
/* check for empty object */
if self.s[self.p] == '}' {
self.p++
return Node{t: types.V_OBJECT}, 0
return emptyObjectNode, 0
}
/* decode each pair */
@@ -243,8 +235,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
}
/* add the value to result */
// FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
ret.Push(Pair{Key: key, Value: val})
ret = append(ret, Pair{Key: key, Value: val})
self.p = self.lspace(self.p)
/* check for EOF */
@@ -255,11 +246,11 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
/* check for the next character */
switch self.s[self.p] {
case ',' : self.p++
case '}' : self.p++; return newObject(ret), 0
case '}' : self.p++; return NewObject(ret), 0
default:
// if val.isLazy() {
// return newLazyObject(self, ret), 0
// }
if val.isLazy() {
return newLazyObject(self, ret), 0
}
return Node{}, types.ERR_INVALID_CHAR
}
}
@@ -299,23 +290,15 @@ func (self *Parser) Parse() (Node, types.ParsingError) {
case types.V_FALSE : return falseNode, 0
case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
case types.V_ARRAY:
if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
self.p = p + 1
return Node{t: types.V_ARRAY}, 0
}
if self.noLazy {
return self.decodeArray(new(linkedNodes))
return self.decodeArray(make([]Node, 0, _DEFAULT_NODE_CAP))
}
return newLazyArray(self), 0
return newLazyArray(self, make([]Node, 0, _DEFAULT_NODE_CAP)), 0
case types.V_OBJECT:
if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
self.p = p + 1
return Node{t: types.V_OBJECT}, 0
}
if self.noLazy {
return self.decodeObject(new(linkedPairs))
return self.decodeObject(make([]Pair, 0, _DEFAULT_NODE_CAP))
}
return newLazyObject(self), 0
return newLazyObject(self, make([]Pair, 0, _DEFAULT_NODE_CAP)), 0
case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
default : return Node{}, types.ParsingError(-val.Vt)
@@ -446,7 +429,7 @@ func (self *Node) skipNextNode() *Node {
}
parser, stack := self.getParserAndArrayStack()
ret := &stack.v
ret := stack.v
sp := parser.p
ns := len(parser.s)
@@ -475,8 +458,7 @@ func (self *Node) skipNextNode() *Node {
}
/* add the value to result */
ret.Push(val)
self.l++
ret = append(ret, val)
parser.p = parser.lspace(parser.p)
/* check for EOF */
@@ -488,11 +470,12 @@ func (self *Node) skipNextNode() *Node {
switch parser.s[parser.p] {
case ',':
parser.p++
return ret.At(ret.Len()-1)
self.setLazyArray(parser, ret)
return &ret[len(ret)-1]
case ']':
parser.p++
self.setArray(ret)
return ret.At(ret.Len()-1)
return &ret[len(ret)-1]
default:
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
@@ -504,7 +487,7 @@ func (self *Node) skipNextPair() (*Pair) {
}
parser, stack := self.getParserAndObjectStack()
ret := &stack.v
ret := stack.v
sp := parser.p
ns := len(parser.s)
@@ -558,8 +541,7 @@ func (self *Node) skipNextPair() (*Pair) {
}
/* add the value to result */
ret.Push(Pair{Key: key, Value: val})
self.l++
ret = append(ret, Pair{Key: key, Value: val})
parser.p = parser.lspace(parser.p)
/* check for EOF */
@@ -571,11 +553,12 @@ func (self *Node) skipNextPair() (*Pair) {
switch parser.s[parser.p] {
case ',':
parser.p++
return ret.At(ret.Len()-1)
self.setLazyObject(parser, ret)
return &ret[len(ret)-1]
case '}':
parser.p++
self.setObject(ret)
return ret.At(ret.Len()-1)
return &ret[len(ret)-1]
default:
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
}
@@ -618,30 +601,10 @@ func LoadsUseNumber(src string) (int, interface{}, error) {
}
}
// NewParser returns pointer of new allocated parser
func NewParser(src string) *Parser {
return &Parser{s: src}
}
// NewParser returns new allocated parser
func NewParserObj(src string) Parser {
return Parser{s: src}
}
// decodeNumber controls if parser decodes the number values instead of skip them
// WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser
// otherwise the memory CANNOT be reused
func (self *Parser) decodeNumber(decode bool) {
if !decode && self.dbuf != nil {
types.FreeDbuf(self.dbuf)
self.dbuf = nil
return
}
if decode && self.dbuf == nil {
self.dbuf = types.NewDbuf()
}
}
// ExportError converts types.ParsingError to std Error
func (self *Parser) ExportError(err types.ParsingError) error {
if err == _ERR_NOT_FOUND {
@@ -652,9 +615,4 @@ func (self *Parser) ExportError(err types.ParsingError) error {
Src : self.s,
Code: err,
}.Description())
}
func backward(src string, i int) int {
for ; i>=0 && isSpace(src[i]); i-- {}
return i
}
}

View File

@@ -16,11 +16,6 @@
package ast
import (
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/native/types`
)
type Searcher struct {
parser Parser
}
@@ -33,106 +28,3 @@ func NewSearcher(str string) *Searcher {
},
}
}
// GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location
func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) {
return self.getByPath(true, true, path...)
}
// GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location
//
// WARN: this search directly refer partial json from top json, which has faster speed,
// may consumes more memory.
func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
return self.getByPath(false, true, path...)
}
func (self *Searcher) getByPath(copystring bool, validate bool, path ...interface{}) (Node, error) {
var err types.ParsingError
var start int
self.parser.p = 0
start, err = self.parser.getByPath(validate, path...)
if err != 0 {
// for compatibility with old version
if err == types.ERR_NOT_FOUND {
return Node{}, ErrNotExist
}
if err == types.ERR_UNSUPPORT_TYPE {
panic("path must be either int(>=0) or string")
}
return Node{}, self.parser.syntaxError(err)
}
t := switchRawType(self.parser.s[start])
if t == _V_NONE {
return Node{}, self.parser.ExportError(err)
}
// copy string to reducing memory usage
var raw string
if copystring {
raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p]))
} else {
raw = self.parser.s[start:self.parser.p]
}
return newRawNode(raw, t), nil
}
// GetByPath searches a path and returns relaction and types of target
func _GetByPath(src string, path ...interface{}) (start int, end int, typ int, err error) {
p := NewParserObj(src)
s, e := p.getByPath(false, path...)
if e != 0 {
// for compatibility with old version
if e == types.ERR_NOT_FOUND {
return -1, -1, 0, ErrNotExist
}
if e == types.ERR_UNSUPPORT_TYPE {
panic("path must be either int(>=0) or string")
}
return -1, -1, 0, p.syntaxError(e)
}
t := switchRawType(p.s[s])
if t == _V_NONE {
return -1, -1, 0, ErrNotExist
}
if t == _V_NUMBER {
p.p = 1 + backward(p.s, p.p-1)
}
return s, p.p, int(t), nil
}
// ValidSyntax check if a json has a valid JSON syntax,
// while not validate UTF-8 charset
func _ValidSyntax(json string) bool {
p := NewParserObj(json)
_, e := p.skip()
if e != 0 {
return false
}
if skipBlank(p.s, p.p) != -int(types.ERR_EOF) {
return false
}
return true
}
// SkipFast skip a json value in fast-skip algs,
// while not strictly validate JSON syntax and UTF-8 charset.
func _SkipFast(src string, i int) (int, int, error) {
p := NewParserObj(src)
p.p = i
s, e := p.skipFast()
if e != 0 {
return -1, -1, p.ExportError(e)
}
t := switchRawType(p.s[s])
if t == _V_NONE {
return -1, -1, ErrNotExist
}
if t == _V_NUMBER {
p.p = 1 + backward(p.s, p.p-1)
}
return s, p.p, nil
}

View File

@@ -1,4 +1,4 @@
// +build !amd64 !go1.16 go1.23
// +build !amd64 go1.21
/*
* Copyright 2021 ByteDance Inc.

View File

@@ -1,4 +1,4 @@
// +build !amd64 !go1.16 go1.23
// +build !amd64 go1.21
/*
* Copyright 2023 ByteDance Inc.
@@ -14,34 +14,28 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
*/
package decoder
import (
`bytes`
`encoding/json`
`io`
`reflect`
`unsafe`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/option`
`encoding/json`
`bytes`
`reflect`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/option`
`io`
)
func init() {
println("WARNING: sonic only supports Go1.16~1.22 && CPU amd64, but your environment is not suitable")
}
const (
_F_use_int64 = 0
_F_disable_urc = 2
_F_disable_unknown = 3
_F_copy_string = 4
_F_use_number = types.B_USE_NUMBER
_F_validate_string = types.B_VALIDATE_STRING
_F_allow_control = types.B_ALLOW_CONTROL
_F_use_int64 = iota
_F_use_number
_F_disable_urc
_F_disable_unknown
_F_copy_string
_F_validate_string
_F_allow_control = 31
)
type Options uint64
@@ -112,10 +106,10 @@ func (self *Decoder) CheckTrailings() error {
func (self *Decoder) Decode(val interface{}) error {
r := bytes.NewBufferString(self.s)
dec := json.NewDecoder(r)
if (self.f & uint64(OptionUseNumber)) != 0 {
if (self.f | uint64(OptionUseNumber)) != 0 {
dec.UseNumber()
}
if (self.f & uint64(OptionDisableUnknown)) != 0 {
if (self.f | uint64(OptionDisableUnknown)) != 0 {
dec.DisallowUnknownFields()
}
return dec.Decode(val)
@@ -169,26 +163,34 @@ func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
return nil
}
type StreamDecoder = json.Decoder
type StreamDecoder struct {
r io.Reader
buf []byte
scanp int
scanned int64
err error
Decoder
}
// NewStreamDecoder adapts to encoding/json.NewDecoder API.
//
// NewStreamDecoder returns a new decoder that reads from r.
func NewStreamDecoder(r io.Reader) *StreamDecoder {
return json.NewDecoder(r)
return &StreamDecoder{r : r}
}
// SyntaxError represents json syntax error
type SyntaxError json.SyntaxError
// Description
func (s SyntaxError) Description() string {
return (*json.SyntaxError)(unsafe.Pointer(&s)).Error()
}
// Error
func (s SyntaxError) Error() string {
return (*json.SyntaxError)(unsafe.Pointer(&s)).Error()
// Decode decodes input stream into val with corresponding data.
// Redundantly bytes may be read and left in its buffer, and can be used at next call.
// Either io error from underlying io.Reader (except io.EOF)
// or syntax error from data will be recorded and stop subsequently decoding.
func (self *StreamDecoder) Decode(val interface{}) (err error) {
dec := json.NewDecoder(self.r)
if (self.f | uint64(OptionUseNumber)) != 0 {
dec.UseNumber()
}
if (self.f | uint64(OptionDisableUnknown)) != 0 {
dec.DisallowUnknownFields()
}
return dec.Decode(val)
}
// MismatchTypeError represents dismatching between json and object
type MismatchTypeError json.UnmarshalTypeError

View File

@@ -1,4 +1,4 @@
// +build !amd64 !go1.16 go1.23
// +build !amd64 go1.21
/*
* Copyright 2023 ByteDance Inc.
@@ -27,13 +27,6 @@ import (
`github.com/bytedance/sonic/option`
)
func init() {
println("WARNING:(encoder) sonic only supports Go1.16~1.22 && CPU amd64, but your environment is not suitable")
}
// EnableFallback indicates if encoder use fallback
const EnableFallback = true
// Options is a set of encoding options.
type Options uint64
@@ -44,8 +37,6 @@ const (
bitNoQuoteTextMarshaler
bitNoNullSliceOrMap
bitValidateString
bitNoValidateJSONMarshaler
bitNoEncoderNewline
// used for recursive compile
bitPointerValue = 63
@@ -77,13 +68,6 @@ const (
// ValidateString indicates that encoder should validate the input string
// before encoding it into JSON.
ValidateString Options = 1 << bitValidateString
// NoValidateJSONMarshaler indicates that the encoder should not validate the output string
// after encoding the JSONMarshaler to JSON.
NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
// NoEncoderNewline indicates that the encoder should not add a newline after every message
NoEncoderNewline Options = 1 << bitNoEncoderNewline
// CompatibleWithStd is used to be compatible with std encoder.
CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
@@ -128,24 +112,6 @@ func (self *Encoder) SetValidateString(f bool) {
}
}
// SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
if f {
self.Opts |= NoValidateJSONMarshaler
} else {
self.Opts &= ^NoValidateJSONMarshaler
}
}
// SetNoEncoderNewline specifies if option NoEncoderNewline opens
func (self *Encoder) SetNoEncoderNewline(f bool) {
if f {
self.Opts |= NoEncoderNewline
} else {
self.Opts &= ^NoEncoderNewline
}
}
// SetCompactMarshaler specifies if option CompactMarshaler opens
func (self *Encoder) SetCompactMarshaler(f bool) {
if f {
@@ -191,19 +157,15 @@ func Encode(val interface{}, opts Options) ([]byte, error) {
// EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
// a new one.
func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
if buf == nil {
panic("user-supplied buffer buf is nil")
}
w := bytes.NewBuffer(*buf)
enc := json.NewEncoder(w)
enc.SetEscapeHTML((opts & EscapeHTML) != 0)
err := enc.Encode(val)
*buf = w.Bytes()
l := len(*buf)
if l > 0 && (opts & NoEncoderNewline != 0) && (*buf)[l-1] == '\n' {
*buf = (*buf)[:l-1]
}
return err
if buf == nil {
panic("user-supplied buffer buf is nil")
}
w := bytes.NewBuffer(*buf)
enc := json.NewEncoder(w)
enc.SetEscapeHTML((opts & EscapeHTML) != 0)
err := enc.Encode(val)
*buf = w.Bytes()
return err
}
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
@@ -250,12 +212,23 @@ func Valid(data []byte) (ok bool, start int) {
}
// StreamEncoder uses io.Writer as
type StreamEncoder = json.Encoder
type StreamEncoder struct {
w io.Writer
Encoder
}
// NewStreamEncoder adapts to encoding/json.NewDecoder API.
//
// NewStreamEncoder returns a new encoder that write to w.
func NewStreamEncoder(w io.Writer) *StreamEncoder {
return json.NewEncoder(w)
return &StreamEncoder{w: w}
}
// Encode encodes interface{} as JSON to io.Writer
func (enc *StreamEncoder) Encode(val interface{}) (err error) {
jenc := json.NewEncoder(enc.w)
jenc.SetEscapeHTML((enc.Opts & EscapeHTML) != 0)
jenc.SetIndent(enc.prefix, enc.indent)
err = jenc.Encode(val)
return err
}

View File

@@ -831,7 +831,7 @@ func (self *_Compiler) compileStructFieldZero(p *_Program, vt reflect.Type) {
case reflect.Float32 : p.add(_OP_is_zero_4)
case reflect.Float64 : p.add(_OP_is_zero_8)
case reflect.String : p.add(_OP_is_nil_p1)
case reflect.Interface : p.add(_OP_is_nil)
case reflect.Interface : p.add(_OP_is_nil_p1)
case reflect.Map : p.add(_OP_is_zero_map)
case reflect.Ptr : p.add(_OP_is_nil)
case reflect.Slice : p.add(_OP_is_nil_p1)

View File

@@ -40,8 +40,6 @@ const (
bitNoQuoteTextMarshaler
bitNoNullSliceOrMap
bitValidateString
bitNoValidateJSONMarshaler
bitNoEncoderNewline
// used for recursive compile
bitPointerValue = 63
@@ -73,13 +71,6 @@ const (
// ValidateString indicates that encoder should validate the input string
// before encoding it into JSON.
ValidateString Options = 1 << bitValidateString
// NoValidateJSONMarshaler indicates that the encoder should not validate the output string
// after encoding the JSONMarshaler to JSON.
NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
// NoEncoderNewline indicates that the encoder should not add a newline after every message
NoEncoderNewline Options = 1 << bitNoEncoderNewline
// CompatibleWithStd is used to be compatible with std encoder.
CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
@@ -124,25 +115,6 @@ func (self *Encoder) SetValidateString(f bool) {
}
}
// SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
if f {
self.Opts |= NoValidateJSONMarshaler
} else {
self.Opts &= ^NoValidateJSONMarshaler
}
}
// SetNoEncoderNewline specifies if option NoEncoderNewline opens
func (self *Encoder) SetNoEncoderNewline(f bool) {
if f {
self.Opts |= NoEncoderNewline
} else {
self.Opts &= ^NoEncoderNewline
}
}
// SetCompactMarshaler specifies if option CompactMarshaler opens
func (self *Encoder) SetCompactMarshaler(f bool) {
if f {
@@ -320,6 +292,7 @@ func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
cfg := option.DefaultCompileOptions()
for _, opt := range opts {
opt(&cfg)
break
}
return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
}
@@ -337,7 +310,7 @@ func Valid(data []byte) (ok bool, start int) {
s := rt.Mem2Str(data)
p := 0
m := types.NewStateMachine()
ret := native.ValidateOne(&s, &p, m, types.F_VALIDATE_STRING)
ret := native.ValidateOne(&s, &p, m)
types.FreeStateMachine(m)
if ret < 0 {
@@ -352,4 +325,4 @@ func Valid(data []byte) (ok bool, start int) {
}
return true, ret
}
}

View File

@@ -36,8 +36,7 @@ func NewStreamEncoder(w io.Writer) *StreamEncoder {
// Encode encodes interface{} as JSON to io.Writer
func (enc *StreamEncoder) Encode(val interface{}) (err error) {
buf := newBytes()
out := buf
out := newBytes()
/* encode into the buffer */
err = EncodeInto(&out, val, enc.Opts)
@@ -55,9 +54,7 @@ func (enc *StreamEncoder) Encode(val interface{}) (err error) {
}
// according to standard library, terminate each value with a newline...
if enc.Opts & NoEncoderNewline == 0 {
buf.WriteByte('\n')
}
buf.WriteByte('\n')
/* copy into io.Writer */
_, err = io.Copy(enc.w, buf)
@@ -78,12 +75,10 @@ func (enc *StreamEncoder) Encode(val interface{}) (err error) {
}
// according to standard library, terminate each value with a newline...
if enc.Opts & NoEncoderNewline == 0 {
enc.w.Write([]byte{'\n'})
}
enc.w.Write([]byte{'\n'})
}
free_bytes:
freeBytes(buf)
freeBytes(out)
return err
}
}

View File

@@ -72,6 +72,18 @@ func (self *BaseAssembler) NOPn(n int) {
}
}
func (self *BaseAssembler) StorePtr(ptr int64, to obj.Addr, tmp obj.Addr) {
if (to.Type != obj.TYPE_MEM) || (tmp.Type != obj.TYPE_REG) {
panic("must store imm to memory, tmp must be register")
}
if (ptr >> 32) != 0 {
self.Emit("MOVQ", Imm(ptr), tmp)
self.Emit("MOVQ", tmp, to)
} else {
self.Emit("MOVQ", Imm(ptr), to);
}
}
func (self *BaseAssembler) Byte(v ...byte) {
for ; len(v) >= 8; v = v[8:] { self.From("QUAD", Imm(rt.Get64(v))) }
for ; len(v) >= 4; v = v[4:] { self.From("LONG", Imm(int64(rt.Get32(v)))) }

View File

@@ -24,6 +24,11 @@ import (
`github.com/twitchyliquid64/golang-asm/obj`
)
//go:noescape
//go:linkname getitab runtime.getitab
//goland:noinspection ALL
func getitab(inter *rt.GoType, typ *rt.GoType, canfail bool) *rt.GoItab
func Func(f interface{}) obj.Addr {
if p := rt.UnpackEface(f); p.Type.Kind() != reflect.Func {
panic("f is not a function")
@@ -37,7 +42,7 @@ func Type(t reflect.Type) obj.Addr {
}
func Itab(i *rt.GoType, t reflect.Type) obj.Addr {
return Imm(int64(uintptr(unsafe.Pointer(rt.Getitab(rt.IfaceType(i), rt.UnpackType(t), false)))))
return Imm(int64(uintptr(unsafe.Pointer(getitab(i, rt.UnpackType(t), false)))))
}
func Gitab(i *rt.GoItab) obj.Addr {

View File

@@ -24,10 +24,12 @@ import (
`github.com/bytedance/sonic/internal/native/avx2`
`github.com/bytedance/sonic/internal/native/sse`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)
const MaxFrameSize uintptr = 400
const (
MaxFrameSize uintptr = 400
BufPaddingSize int = 64
)
var (
S_f64toa uintptr
@@ -59,221 +61,142 @@ var (
S_skip_number uintptr
)
var (
__Quote func(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn unsafe.Pointer, flags uint64) int
__Unquote func(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep unsafe.Pointer, flags uint64) int
__HTMLEscape func(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn unsafe.Pointer) int
__Value func(s unsafe.Pointer, n int, p int, v unsafe.Pointer, flags uint64) int
__SkipOne func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) int
__SkipOneFast func(s unsafe.Pointer, p unsafe.Pointer) int
__GetByPath func(s unsafe.Pointer, p unsafe.Pointer, path unsafe.Pointer, m unsafe.Pointer) int
__ValidateOne func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer, flags uint64) int
__I64toa func(out unsafe.Pointer, val int64) (ret int)
__U64toa func(out unsafe.Pointer, val uint64) (ret int)
__F64toa func(out unsafe.Pointer, val float64) (ret int)
__F32toa func(out unsafe.Pointer, val float32) (ret int)
__ValidateUTF8 func(s unsafe.Pointer, p unsafe.Pointer, m unsafe.Pointer) (ret int)
__ValidateUTF8Fast func(s unsafe.Pointer) (ret int)
)
//go:nosplit
//go:noescape
//goland:noinspection GoUnusedParameter
func Quote(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) int
//go:nosplit
func Quote(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int, flags uint64) int {
return __Quote(rt.NoEscape(unsafe.Pointer(s)), nb, rt.NoEscape(unsafe.Pointer(dp)), rt.NoEscape(unsafe.Pointer(dn)), flags)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int
//go:nosplit
func Unquote(s unsafe.Pointer, nb int, dp unsafe.Pointer, ep *int, flags uint64) int {
return __Unquote(rt.NoEscape(unsafe.Pointer(s)), nb, rt.NoEscape(unsafe.Pointer(dp)), rt.NoEscape(unsafe.Pointer(ep)), flags)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func HTMLEscape(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int) int
//go:nosplit
func HTMLEscape(s unsafe.Pointer, nb int, dp unsafe.Pointer, dn *int) int {
return __HTMLEscape(rt.NoEscape(unsafe.Pointer(s)), nb, rt.NoEscape(unsafe.Pointer(dp)), rt.NoEscape(unsafe.Pointer(dn)))
}
//go:noescape
//goland:noinspection GoUnusedParameter
func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) int
//go:nosplit
func Value(s unsafe.Pointer, n int, p int, v *types.JsonState, flags uint64) int {
return __Value(rt.NoEscape(unsafe.Pointer(s)), n, p, rt.NoEscape(unsafe.Pointer(v)), flags)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func SkipOne(s *string, p *int, m *types.StateMachine, flags uint64) int
//go:nosplit
func SkipOne(s *string, p *int, m *types.StateMachine, flags uint64) int {
return __SkipOne(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func SkipOneFast(s *string, p *int) int
//go:nosplit
func SkipOneFast(s *string, p *int) int {
return __SkipOneFast(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)))
}
//go:noescape
//goland:noinspection GoUnusedParameter
func GetByPath(s *string, p *int, path *[]interface{}, m *types.StateMachine) int
//go:nosplit
func GetByPath(s *string, p *int, path *[]interface{}, m *types.StateMachine) int {
return __GetByPath(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(path)), rt.NoEscape(unsafe.Pointer(m)))
}
//go:noescape
//goland:noinspection GoUnusedParameter
func ValidateOne(s *string, p *int, m *types.StateMachine) int
//go:nosplit
func ValidateOne(s *string, p *int, m *types.StateMachine, flags uint64) int {
return __ValidateOne(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)), flags)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func I64toa(out *byte, val int64) (ret int)
//go:nosplit
func I64toa(out *byte, val int64) (ret int) {
return __I64toa(rt.NoEscape(unsafe.Pointer(out)), val)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func U64toa(out *byte, val uint64) (ret int)
//go:nosplit
func U64toa(out *byte, val uint64) (ret int) {
return __U64toa(rt.NoEscape(unsafe.Pointer(out)), val)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func F64toa(out *byte, val float64) (ret int)
//go:nosplit
func F64toa(out *byte, val float64) (ret int) {
return __F64toa(rt.NoEscape(unsafe.Pointer(out)), val)
}
//go:noescape
//goland:noinspection GoUnusedParameter
func ValidateUTF8(s *string, p *int, m *types.StateMachine) (ret int)
//go:nosplit
func F32toa(out *byte, val float32) (ret int) {
return __F32toa(rt.NoEscape(unsafe.Pointer(out)), val)
}
//go:nosplit
func ValidateUTF8(s *string, p *int, m *types.StateMachine) (ret int) {
return __ValidateUTF8(rt.NoEscape(unsafe.Pointer(s)), rt.NoEscape(unsafe.Pointer(p)), rt.NoEscape(unsafe.Pointer(m)))
}
//go:nosplit
func ValidateUTF8Fast(s *string) (ret int) {
return __ValidateUTF8Fast(rt.NoEscape(unsafe.Pointer(s)))
}
func useSSE() {
sse.Use()
S_f64toa = sse.S_f64toa
__F64toa = sse.F_f64toa
S_f32toa = sse.S_f32toa
__F64toa = sse.F_f64toa
S_i64toa = sse.S_i64toa
__I64toa = sse.F_i64toa
S_u64toa = sse.S_u64toa
__U64toa = sse.F_u64toa
S_lspace = sse.S_lspace
S_quote = sse.S_quote
__Quote = sse.F_quote
S_unquote = sse.S_unquote
__Unquote = sse.F_unquote
S_value = sse.S_value
__Value = sse.F_value
S_vstring = sse.S_vstring
S_vnumber = sse.S_vnumber
S_vsigned = sse.S_vsigned
S_vunsigned = sse.S_vunsigned
S_skip_one = sse.S_skip_one
__SkipOne = sse.F_skip_one
__SkipOneFast = sse.F_skip_one_fast
S_skip_array = sse.S_skip_array
S_skip_object = sse.S_skip_object
S_skip_number = sse.S_skip_number
S_get_by_path = sse.S_get_by_path
__GetByPath = sse.F_get_by_path
__HTMLEscape = sse.F_html_escape
__ValidateOne = sse.F_validate_one
__ValidateUTF8= sse.F_validate_utf8
__ValidateUTF8Fast = sse.F_validate_utf8_fast
}
//go:noescape
//goland:noinspection GoUnusedParameter
func ValidateUTF8Fast(s *string) (ret int)
func useAVX() {
avx.Use()
S_f64toa = avx.S_f64toa
__F64toa = avx.F_f64toa
S_f32toa = avx.S_f32toa
__F64toa = avx.F_f64toa
S_i64toa = avx.S_i64toa
__I64toa = avx.F_i64toa
S_u64toa = avx.S_u64toa
__U64toa = avx.F_u64toa
S_lspace = avx.S_lspace
S_quote = avx.S_quote
__Quote = avx.F_quote
S_unquote = avx.S_unquote
__Unquote = avx.F_unquote
S_value = avx.S_value
__Value = avx.F_value
S_vstring = avx.S_vstring
S_vnumber = avx.S_vnumber
S_vsigned = avx.S_vsigned
S_vunsigned = avx.S_vunsigned
S_skip_one = avx.S_skip_one
__SkipOne = avx.F_skip_one
__SkipOneFast = avx.F_skip_one_fast
S_skip_one_fast = avx.S_skip_one_fast
S_skip_array = avx.S_skip_array
S_skip_object = avx.S_skip_object
S_skip_number = avx.S_skip_number
S_get_by_path = avx.S_get_by_path
__GetByPath = avx.F_get_by_path
__HTMLEscape = avx.F_html_escape
__ValidateOne = avx.F_validate_one
__ValidateUTF8= avx.F_validate_utf8
__ValidateUTF8Fast = avx.F_validate_utf8_fast
}
func useAVX2() {
avx2.Use()
S_f64toa = avx2.S_f64toa
__F64toa = avx2.F_f64toa
S_f32toa = avx2.S_f32toa
__F64toa = avx2.F_f64toa
S_i64toa = avx2.S_i64toa
__I64toa = avx2.F_i64toa
S_u64toa = avx2.S_u64toa
__U64toa = avx2.F_u64toa
S_lspace = avx2.S_lspace
S_quote = avx2.S_quote
__Quote = avx2.F_quote
S_unquote = avx2.S_unquote
__Unquote = avx2.F_unquote
S_value = avx2.S_value
__Value = avx2.F_value
S_vstring = avx2.S_vstring
S_vnumber = avx2.S_vnumber
S_vsigned = avx2.S_vsigned
S_vunsigned = avx2.S_vunsigned
S_skip_one = avx2.S_skip_one
__SkipOne = avx2.F_skip_one
__SkipOneFast = avx2.F_skip_one_fast
S_skip_one_fast = avx2.S_skip_one_fast
S_skip_array = avx2.S_skip_array
S_skip_object = avx2.S_skip_object
S_skip_number = avx2.S_skip_number
S_get_by_path = avx2.S_get_by_path
__GetByPath = avx2.F_get_by_path
__HTMLEscape = avx2.F_html_escape
__ValidateOne = avx2.F_validate_one
__ValidateUTF8= avx2.F_validate_utf8
__ValidateUTF8Fast = avx2.F_validate_utf8_fast
}
func useSSE() {
S_f64toa = sse.S_f64toa
S_f32toa = sse.S_f32toa
S_i64toa = sse.S_i64toa
S_u64toa = sse.S_u64toa
S_lspace = sse.S_lspace
S_quote = sse.S_quote
S_unquote = sse.S_unquote
S_value = sse.S_value
S_vstring = sse.S_vstring
S_vnumber = sse.S_vnumber
S_vsigned = sse.S_vsigned
S_vunsigned = sse.S_vunsigned
S_skip_one = sse.S_skip_one
S_skip_one_fast = sse.S_skip_one_fast
S_skip_array = sse.S_skip_array
S_skip_object = sse.S_skip_object
S_skip_number = sse.S_skip_number
S_get_by_path = sse.S_get_by_path
}
func init() {
if cpu.HasAVX2 {
useAVX2()
} else if cpu.HasAVX {
useAVX()
} else if cpu.HasSSE {
useSSE()
} else {
panic("Unsupported CPU, maybe it's too old to run Sonic.")
}
if cpu.HasAVX2 {
useAVX2()
} else if cpu.HasAVX {
useAVX()
} else if cpu.HasSSE {
useSSE()
} else {
panic("Unsupported CPU, maybe it's too old to run Sonic.")
}
}

View File

@@ -19,7 +19,6 @@ package types
import (
`fmt`
`sync`
`unsafe`
)
type ValueType int
@@ -29,8 +28,6 @@ type SearchingError uint
// NOTE: !NOT MODIFIED ONLY.
// This definitions are followed in native/types.h.
const BufPaddingSize int = 64
const (
V_EOF ValueType = 1
V_NULL ValueType = 2
@@ -49,23 +46,15 @@ const (
)
const (
// for native.Unquote() flags
B_DOUBLE_UNQUOTE = 0
B_UNICODE_REPLACE = 1
// for native.Value() flags
B_USE_NUMBER = 1
B_VALIDATE_STRING = 5
B_ALLOW_CONTROL = 31
)
const (
F_DOUBLE_UNQUOTE = 1 << B_DOUBLE_UNQUOTE
F_UNICODE_REPLACE = 1 << B_UNICODE_REPLACE
F_USE_NUMBER = 1 << B_USE_NUMBER
F_VALIDATE_STRING = 1 << B_VALIDATE_STRING
F_ALLOW_CONTROL = 1 << B_ALLOW_CONTROL
)
const (
@@ -147,18 +136,3 @@ func FreeStateMachine(fsm *StateMachine) {
stackPool.Put(fsm)
}
const MaxDigitNums = 800
var digitPool = sync.Pool{
New: func() interface{} {
return (*byte)(unsafe.Pointer(&[MaxDigitNums]byte{}))
},
}
func NewDbuf() *byte {
return digitPool.Get().(*byte)
}
func FreeDbuf(p *byte) {
digitPool.Put(p)
}

View File

@@ -1,4 +1,5 @@
// +build !noasm,amd64 !appengine,amd64
// +build !noasm !appengine
// Code generated by asm2asm, DO NOT EDIT·
#include "go_asm.h"
#include "funcdata.h"

View File

@@ -66,16 +66,15 @@ func FuncAddr(f interface{}) unsafe.Pointer {
}
}
//go:nocheckptr
func IndexChar(src string, index int) unsafe.Pointer {
return unsafe.Pointer(uintptr((*GoString)(unsafe.Pointer(&src)).Ptr) + uintptr(index))
}
//go:nocheckptr
func IndexByte(ptr []byte, index int) unsafe.Pointer {
return unsafe.Pointer(uintptr((*GoSlice)(unsafe.Pointer(&ptr)).Ptr) + uintptr(index))
}
//go:nosplit
func GuardSlice(buf *[]byte, n int) {
c := cap(*buf)
l := len(*buf)
@@ -110,16 +109,4 @@ func StrFrom(p unsafe.Pointer, n int64) (s string) {
(*GoString)(unsafe.Pointer(&s)).Ptr = p
(*GoString)(unsafe.Pointer(&s)).Len = int(n)
return
}
// NoEscape hides a pointer from escape analysis. NoEscape is
// the identity function but escape analysis doesn't think the
// output depends on the input. NoEscape is inlined and currently
// compiles down to zero instructions.
// USE CAREFULLY!
//go:nosplit
//goland:noinspection GoVetUnsafePointer
func NoEscape(p unsafe.Pointer) unsafe.Pointer {
x := uintptr(p)
return unsafe.Pointer(x ^ 0)
}

View File

@@ -211,33 +211,3 @@ func findReflectRtypeItab() *GoItab {
v := reflect.TypeOf(struct{}{})
return (*GoIface)(unsafe.Pointer(&v)).Itab
}
func AssertI2I2(t *GoType, i GoIface) (r GoIface) {
inter := IfaceType(t)
tab := i.Itab
if tab == nil {
return
}
if (*GoInterfaceType)(tab.it) != inter {
tab = Getitab(inter, tab.Vt, true)
if tab == nil {
return
}
}
r.Itab = tab
r.Value = i.Value
return
}
//go:noescape
//go:linkname Getitab runtime.getitab
func Getitab(inter *GoInterfaceType, typ *GoType, canfail bool) *GoItab
func GetFuncPC(fn interface{}) uintptr {
ft := UnpackEface(fn)
if ft.Type.Kind() != reflect.Func {
panic("not a function")
}
return *(*uintptr)(ft.Value)
}

View File

@@ -17,12 +17,12 @@
package rt
const (
MinInt48 int64 = -(1 << 47)
MaxInt48 int64 = +(1 << 47) - 1
MinInt48 = -(1 << 47)
MaxInt48 = +(1 << 47) - 1
)
func PackInt(v int) uint64 {
if u := uint64(v); int64(v) < MinInt48 || int64(v) > MaxInt48 {
if u := uint64(v); v < MinInt48 || v > MaxInt48 {
panic("int48 out of range")
} else {
return ((u >> 63) << 47) | (u & 0x00007fffffffffff)

View File

@@ -42,13 +42,6 @@ const (
_SUB_BUCKETSIZE = _BUCKETSIZE / _SUBBUCKETS
)
// Note: This list must match the list in runtime/symtab.go.
const (
FuncFlag_TOPFRAME = 1 << iota
FuncFlag_SPWRITE
FuncFlag_ASM
)
// PCDATA and FUNCDATA table indexes.
//
// See funcdata.h and $GROOT/src/cmd/internal/objabi/funcdata.go.
@@ -148,98 +141,4 @@ func funcNameParts(name string) (string, string, string) {
return name, "", ""
}
return name[:i], "[...]", name[j+1:]
}
// func name table format:
// nameOff[0] -> namePartA namePartB namePartC \x00
// nameOff[1] -> namePartA namePartB namePartC \x00
// ...
func makeFuncnameTab(funcs []Func) (tab []byte, offs []int32) {
offs = make([]int32, len(funcs))
offset := 1
tab = []byte{0}
for i, f := range funcs {
offs[i] = int32(offset)
a, b, c := funcNameParts(f.Name)
tab = append(tab, a...)
tab = append(tab, b...)
tab = append(tab, c...)
tab = append(tab, 0)
offset += len(a) + len(b) + len(c) + 1
}
return
}
// CU table format:
// cuOffsets[0] -> filetabOffset[0] filetabOffset[1] ... filetabOffset[len(CUs[0].fileNames)-1]
// cuOffsets[1] -> filetabOffset[len(CUs[0].fileNames)] ... filetabOffset[len(CUs[0].fileNames) + len(CUs[1].fileNames)-1]
// ...
//
// file name table format:
// filetabOffset[0] -> CUs[0].fileNames[0] \x00
// ...
// filetabOffset[len(CUs[0]-1)] -> CUs[0].fileNames[len(CUs[0].fileNames)-1] \x00
// ...
// filetabOffset[SUM(CUs,fileNames)-1] -> CUs[len(CU)-1].fileNames[len(CUs[len(CU)-1].fileNames)-1] \x00
func makeFilenametab(cus []compilationUnit) (cutab []uint32, filetab []byte, cuOffsets []uint32) {
cuOffsets = make([]uint32, len(cus))
cuOffset := 0
fileOffset := 0
for i, cu := range cus {
cuOffsets[i] = uint32(cuOffset)
for _, name := range cu.fileNames {
cutab = append(cutab, uint32(fileOffset))
fileOffset += len(name) + 1
filetab = append(filetab, name...)
filetab = append(filetab, 0)
}
cuOffset += len(cu.fileNames)
}
return
}
func writeFuncdata(out *[]byte, funcs []Func) (fstart int, funcdataOffs [][]uint32) {
fstart = len(*out)
*out = append(*out, byte(0))
offs := uint32(1)
funcdataOffs = make([][]uint32, len(funcs))
for i, f := range funcs {
var writer = func(fd encoding.BinaryMarshaler) {
var ab []byte
var err error
if fd != nil {
ab, err = fd.MarshalBinary()
if err != nil {
panic(err)
}
funcdataOffs[i] = append(funcdataOffs[i], offs)
} else {
ab = []byte{0}
funcdataOffs[i] = append(funcdataOffs[i], _INVALID_FUNCDATA_OFFSET)
}
*out = append(*out, ab...)
offs += uint32(len(ab))
}
writer(f.ArgsPointerMaps)
writer(f.LocalsPointerMaps)
writer(f.StackObjects)
writer(f.InlTree)
writer(f.OpenCodedDeferInfo)
writer(f.ArgInfo)
writer(f.ArgLiveInfo)
writer(f.WrapInfo)
}
return
}
}

View File

@@ -1,5 +1,4 @@
// go:build go1.18 && !go1.20
//go:build go1.18 && !go1.20
// +build go1.18,!go1.20
/*
@@ -21,13 +20,32 @@
package loader
import (
`github.com/bytedance/sonic/loader/internal/rt`
`encoding`
`os`
`unsafe`
`github.com/bytedance/sonic/internal/rt`
)
const (
_Magic uint32 = 0xfffffff0
)
type pcHeader struct {
magic uint32 // 0xFFFFFFF0
pad1, pad2 uint8 // 0,0
minLC uint8 // min instruction size
ptrSize uint8 // size of a ptr in bytes
nfunc int // number of functions in the module
nfiles uint // number of entries in the file tab
textStart uintptr // base for function entry PC offsets in this module, equal to moduledata.text
funcnameOffset uintptr // offset to the funcnametab variable from pcHeader
cuOffset uintptr // offset to the cutab variable from pcHeader
filetabOffset uintptr // offset to the filetab variable from pcHeader
pctabOffset uintptr // offset to the pctab variable from pcHeader
pclnOffset uintptr // offset to the pclntab variable from pcHeader
}
type moduledata struct {
pcHeader *pcHeader
funcnametab []byte
@@ -111,3 +129,413 @@ type _func struct {
//
// funcdata [nfuncdata]uint32
}
type funcTab struct {
entry uint32
funcoff uint32
}
type bitVector struct {
n int32 // # of bits
bytedata *uint8
}
type ptabEntry struct {
name int32
typ int32
}
type textSection struct {
vaddr uintptr // prelinked section vaddr
end uintptr // vaddr + section length
baseaddr uintptr // relocated section address
}
type modulehash struct {
modulename string
linktimehash string
runtimehash *string
}
// findfuncbucket is an array of these structures.
// Each bucket represents 4096 bytes of the text segment.
// Each subbucket represents 256 bytes of the text segment.
// To find a function given a pc, locate the bucket and subbucket for
// that pc. Add together the idx and subbucket value to obtain a
// function index. Then scan the functab array starting at that
// index to find the target function.
// This table uses 20 bytes for every 4096 bytes of code, or ~0.5% overhead.
type findfuncbucket struct {
idx uint32
_SUBBUCKETS [16]byte
}
// func name table format:
// nameOff[0] -> namePartA namePartB namePartC \x00
// nameOff[1] -> namePartA namePartB namePartC \x00
// ...
func makeFuncnameTab(funcs []Func) (tab []byte, offs []int32) {
offs = make([]int32, len(funcs))
offset := 0
for i, f := range funcs {
offs[i] = int32(offset)
a, b, c := funcNameParts(f.Name)
tab = append(tab, a...)
tab = append(tab, b...)
tab = append(tab, c...)
tab = append(tab, 0)
offset += len(a) + len(b) + len(c) + 1
}
return
}
type compilationUnit struct {
fileNames []string
}
// CU table format:
// cuOffsets[0] -> filetabOffset[0] filetabOffset[1] ... filetabOffset[len(CUs[0].fileNames)-1]
// cuOffsets[1] -> filetabOffset[len(CUs[0].fileNames)] ... filetabOffset[len(CUs[0].fileNames) + len(CUs[1].fileNames)-1]
// ...
//
// file name table format:
// filetabOffset[0] -> CUs[0].fileNames[0] \x00
// ...
// filetabOffset[len(CUs[0]-1)] -> CUs[0].fileNames[len(CUs[0].fileNames)-1] \x00
// ...
// filetabOffset[SUM(CUs,fileNames)-1] -> CUs[len(CU)-1].fileNames[len(CUs[len(CU)-1].fileNames)-1] \x00
func makeFilenametab(cus []compilationUnit) (cutab []uint32, filetab []byte, cuOffsets []uint32) {
cuOffsets = make([]uint32, len(cus))
cuOffset := 0
fileOffset := 0
for i, cu := range cus {
cuOffsets[i] = uint32(cuOffset)
for _, name := range cu.fileNames {
cutab = append(cutab, uint32(fileOffset))
fileOffset += len(name) + 1
filetab = append(filetab, name...)
filetab = append(filetab, 0)
}
cuOffset += len(cu.fileNames)
}
return
}
func writeFuncdata(out *[]byte, funcs []Func) (fstart int, funcdataOffs [][]uint32) {
fstart = len(*out)
*out = append(*out, byte(0))
offs := uint32(1)
funcdataOffs = make([][]uint32, len(funcs))
for i, f := range funcs {
var writer = func(fd encoding.BinaryMarshaler) {
var ab []byte
var err error
if fd != nil {
ab, err = fd.MarshalBinary()
if err != nil {
panic(err)
}
funcdataOffs[i] = append(funcdataOffs[i], offs)
} else {
ab = []byte{0}
funcdataOffs[i] = append(funcdataOffs[i], _INVALID_FUNCDATA_OFFSET)
}
*out = append(*out, ab...)
offs += uint32(len(ab))
}
writer(f.ArgsPointerMaps)
writer(f.LocalsPointerMaps)
writer(f.StackObjects)
writer(f.InlTree)
writer(f.OpenCodedDeferInfo)
writer(f.ArgInfo)
writer(f.ArgLiveInfo)
writer(f.WrapInfo)
}
return
}
func makeFtab(funcs []_func, lastFuncSize uint32) (ftab []funcTab) {
// Allocate space for the pc->func table. This structure consists of a pc offset
// and an offset to the func structure. After that, we have a single pc
// value that marks the end of the last function in the binary.
var size int64 = int64(len(funcs)*2*4 + 4)
var startLocations = make([]uint32, len(funcs))
for i, f := range funcs {
size = rnd(size, int64(_PtrSize))
//writePCToFunc
startLocations[i] = uint32(size)
size += int64(uint8(_FUNC_SIZE)+f.nfuncdata*4+uint8(f.npcdata)*4)
}
ftab = make([]funcTab, 0, len(funcs)+1)
// write a map of pc->func info offsets
for i, f := range funcs {
ftab = append(ftab, funcTab{uint32(f.entryOff), uint32(startLocations[i])})
}
// Final entry of table is just end pc offset.
lastFunc := funcs[len(funcs)-1]
ftab = append(ftab, funcTab{uint32(lastFunc.entryOff + lastFuncSize), 0})
return
}
// Pcln table format: [...]funcTab + [...]_Func
func makePclntable(funcs []_func, lastFuncSize uint32, pcdataOffs [][]uint32, funcdataOffs [][]uint32) (pclntab []byte) {
// Allocate space for the pc->func table. This structure consists of a pc offset
// and an offset to the func structure. After that, we have a single pc
// value that marks the end of the last function in the binary.
var size int64 = int64(len(funcs)*2*4 + 4)
var startLocations = make([]uint32, len(funcs))
for i := range funcs {
size = rnd(size, int64(_PtrSize))
//writePCToFunc
startLocations[i] = uint32(size)
size += int64(int(_FUNC_SIZE)+len(funcdataOffs[i])*4+len(pcdataOffs[i])*4)
}
pclntab = make([]byte, size, size)
// write a map of pc->func info offsets
offs := 0
for i, f := range funcs {
byteOrder.PutUint32(pclntab[offs:offs+4], uint32(f.entryOff))
byteOrder.PutUint32(pclntab[offs+4:offs+8], uint32(startLocations[i]))
offs += 8
}
// Final entry of table is just end pc offset.
lastFunc := funcs[len(funcs)-1]
byteOrder.PutUint32(pclntab[offs:offs+4], uint32(lastFunc.entryOff+lastFuncSize))
// write func info table
for i, f := range funcs {
off := startLocations[i]
// write _func structure to pclntab
fb := rt.BytesFrom(unsafe.Pointer(&f), int(_FUNC_SIZE), int(_FUNC_SIZE))
copy(pclntab[off:off+uint32(_FUNC_SIZE)], fb)
off += uint32(_FUNC_SIZE)
// NOTICE: _func.pcdata always starts from PcUnsafePoint, which is index 3
for j := 3; j < len(pcdataOffs[i]); j++ {
byteOrder.PutUint32(pclntab[off:off+4], uint32(pcdataOffs[i][j]))
off += 4
}
// funcdata refs as offsets from gofunc
for _, funcdata := range funcdataOffs[i] {
byteOrder.PutUint32(pclntab[off:off+4], uint32(funcdata))
off += 4
}
}
return
}
// findfunc table used to map pc to belonging func,
// returns the index in the func table.
//
// All text section are divided into buckets sized _BUCKETSIZE(4K):
// every bucket is divided into _SUBBUCKETS sized _SUB_BUCKETSIZE(64),
// and it has a base idx to plus the offset stored in jth subbucket.
// see findfunc() in runtime/symtab.go
func writeFindfunctab(out *[]byte, ftab []funcTab) (start int) {
start = len(*out)
max := ftab[len(ftab)-1].entry
min := ftab[0].entry
nbuckets := (max - min + _BUCKETSIZE - 1) / _BUCKETSIZE
n := (max - min + _SUB_BUCKETSIZE - 1) / _SUB_BUCKETSIZE
tab := make([]findfuncbucket, 0, nbuckets)
var s, e = 0, 0
for i := 0; i<int(nbuckets); i++ {
var pc = min + uint32((i+1)*_BUCKETSIZE)
// find the end func of the bucket
for ; e < len(ftab)-1 && ftab[e+1].entry <= pc; e++ {}
// store the start func of the bucket
var fb = findfuncbucket{idx: uint32(s)}
for j := 0; j<_SUBBUCKETS && (i*_SUBBUCKETS+j)<int(n); j++ {
pc = min + uint32(i*_BUCKETSIZE) + uint32((j+1)*_SUB_BUCKETSIZE)
var ss = s
// find the end func of the subbucket
for ; ss < len(ftab)-1 && ftab[ss+1].entry <= pc; ss++ {}
// store the start func of the subbucket
fb._SUBBUCKETS[j] = byte(uint32(s) - fb.idx)
s = ss
}
s = e
tab = append(tab, fb)
}
// write findfuncbucket
if len(tab) > 0 {
size := int(unsafe.Sizeof(findfuncbucket{}))*len(tab)
*out = append(*out, rt.BytesFrom(unsafe.Pointer(&tab[0]), size, size)...)
}
return
}
func makeModuledata(name string, filenames []string, funcs []Func, text []byte) (mod *moduledata) {
mod = new(moduledata)
mod.modulename = name
// make filename table
cu := make([]string, 0, len(filenames))
for _, f := range filenames {
cu = append(cu, f)
}
cutab, filetab, cuOffs := makeFilenametab([]compilationUnit{{cu}})
mod.cutab = cutab
mod.filetab = filetab
// make funcname table
funcnametab, nameOffs := makeFuncnameTab(funcs)
mod.funcnametab = funcnametab
// make pcdata table
// NOTICE: _func only use offset to index pcdata, thus no need mmap() pcdata
pctab, pcdataOffs, _funcs := makePctab(funcs, cuOffs, nameOffs)
mod.pctab = pctab
// write func data
// NOTICE: _func use mod.gofunc+offset to directly point funcdata, thus need cache funcdata
// TODO: estimate accurate capacity
cache := make([]byte, 0, len(funcs)*int(_PtrSize))
fstart, funcdataOffs := writeFuncdata(&cache, funcs)
// make pc->func (binary search) func table
lastFuncsize := funcs[len(funcs)-1].TextSize
ftab := makeFtab(_funcs, lastFuncsize)
mod.ftab = ftab
// write pc->func (modmap) findfunc table
ffstart := writeFindfunctab(&cache, ftab)
// make pclnt table
pclntab := makePclntable(_funcs, lastFuncsize, pcdataOffs, funcdataOffs)
mod.pclntable = pclntab
// mmap() text and funcdata segements
p := os.Getpagesize()
size := int(rnd(int64(len(text)), int64(p)))
addr := mmap(size)
// copy the machine code
s := rt.BytesFrom(unsafe.Pointer(addr), len(text), size)
copy(s, text)
// make it executable
mprotect(addr, size)
// assign addresses
mod.text = addr
mod.etext = addr + uintptr(size)
mod.minpc = addr
mod.maxpc = addr + uintptr(len(text))
// cache funcdata and findfuncbucket
moduleCache.Lock()
moduleCache.m[mod] = cache
moduleCache.Unlock()
mod.gofunc = uintptr(unsafe.Pointer(&cache[fstart]))
mod.findfunctab = uintptr(unsafe.Pointer(&cache[ffstart]))
// make pc header
mod.pcHeader = &pcHeader {
magic : _Magic,
minLC : _MinLC,
ptrSize : _PtrSize,
nfunc : len(funcs),
nfiles: uint(len(cu)),
textStart: mod.text,
funcnameOffset: getOffsetOf(moduledata{}, "funcnametab"),
cuOffset: getOffsetOf(moduledata{}, "cutab"),
filetabOffset: getOffsetOf(moduledata{}, "filetab"),
pctabOffset: getOffsetOf(moduledata{}, "pctab"),
pclnOffset: getOffsetOf(moduledata{}, "pclntable"),
}
// sepecial case: gcdata and gcbss must by non-empty
mod.gcdata = uintptr(unsafe.Pointer(&emptyByte))
mod.gcbss = uintptr(unsafe.Pointer(&emptyByte))
return
}
// makePctab generates pcdelta->valuedelta tables for functions,
// and returns the table and the entry offset of every kind pcdata in the table.
func makePctab(funcs []Func, cuOffset []uint32, nameOffset []int32) (pctab []byte, pcdataOffs [][]uint32, _funcs []_func) {
_funcs = make([]_func, len(funcs))
// Pctab offsets of 0 are considered invalid in the runtime. We respect
// that by just padding a single byte at the beginning of runtime.pctab,
// that way no real offsets can be zero.
pctab = make([]byte, 1, 12*len(funcs)+1)
pcdataOffs = make([][]uint32, len(funcs))
for i, f := range funcs {
_f := &_funcs[i]
var writer = func(pc *Pcdata) {
var ab []byte
var err error
if pc != nil {
ab, err = pc.MarshalBinary()
if err != nil {
panic(err)
}
pcdataOffs[i] = append(pcdataOffs[i], uint32(len(pctab)))
} else {
ab = []byte{0}
pcdataOffs[i] = append(pcdataOffs[i], _PCDATA_INVALID_OFFSET)
}
pctab = append(pctab, ab...)
}
if f.Pcsp != nil {
_f.pcsp = uint32(len(pctab))
}
writer(f.Pcsp)
if f.Pcfile != nil {
_f.pcfile = uint32(len(pctab))
}
writer(f.Pcfile)
if f.Pcline != nil {
_f.pcln = uint32(len(pctab))
}
writer(f.Pcline)
writer(f.PcUnsafePoint)
writer(f.PcStackMapIndex)
writer(f.PcInlTreeIndex)
writer(f.PcArgLiveIndex)
_f.entryOff = f.EntryOff
_f.nameOff = nameOffset[i]
_f.args = f.ArgsSize
_f.deferreturn = f.DeferReturn
// NOTICE: _func.pcdata is always as [PCDATA_UnsafePoint(0) : PCDATA_ArgLiveIndex(3)]
_f.npcdata = uint32(_N_PCDATA)
_f.cuOffset = cuOffset[i]
_f.funcID = f.ID
_f.flag = f.Flag
_f.nfuncdata = uint8(_N_FUNCDATA)
}
return
}
func registerFunction(name string, pc uintptr, textSize uintptr, fp int, args int, size uintptr, argptrs uintptr, localptrs uintptr) {}

View File

@@ -20,7 +20,11 @@
package loader
import (
`github.com/bytedance/sonic/loader/internal/rt`
`encoding`
`os`
`unsafe`
`github.com/bytedance/sonic/internal/rt`
)
const (
@@ -47,6 +51,8 @@ type moduledata struct {
end, gcdata, gcbss uintptr
types, etypes uintptr
rodata uintptr
// TODO: generate funcinfo object to memory
gofunc uintptr // go.func.* is actual funcinfo object in image
textsectmap []textSection // see runtime/symtab.go: textAddr()
@@ -112,3 +118,428 @@ type _func struct {
//
// funcdata [nfuncdata]uint32
}
type funcTab struct {
entry uint32
funcoff uint32
}
type pcHeader struct {
magic uint32 // 0xFFFFFFF0
pad1, pad2 uint8 // 0,0
minLC uint8 // min instruction size
ptrSize uint8 // size of a ptr in bytes
nfunc int // number of functions in the module
nfiles uint // number of entries in the file tab
textStart uintptr // base for function entry PC offsets in this module, equal to moduledata.text
funcnameOffset uintptr // offset to the funcnametab variable from pcHeader
cuOffset uintptr // offset to the cutab variable from pcHeader
filetabOffset uintptr // offset to the filetab variable from pcHeader
pctabOffset uintptr // offset to the pctab variable from pcHeader
pclnOffset uintptr // offset to the pclntab variable from pcHeader
}
type bitVector struct {
n int32 // # of bits
bytedata *uint8
}
type ptabEntry struct {
name int32
typ int32
}
type textSection struct {
vaddr uintptr // prelinked section vaddr
end uintptr // vaddr + section length
baseaddr uintptr // relocated section address
}
type modulehash struct {
modulename string
linktimehash string
runtimehash *string
}
// findfuncbucket is an array of these structures.
// Each bucket represents 4096 bytes of the text segment.
// Each subbucket represents 256 bytes of the text segment.
// To find a function given a pc, locate the bucket and subbucket for
// that pc. Add together the idx and subbucket value to obtain a
// function index. Then scan the functab array starting at that
// index to find the target function.
// This table uses 20 bytes for every 4096 bytes of code, or ~0.5% overhead.
type findfuncbucket struct {
idx uint32
_SUBBUCKETS [16]byte
}
// func name table format:
// nameOff[0] -> namePartA namePartB namePartC \x00
// nameOff[1] -> namePartA namePartB namePartC \x00
// ...
func makeFuncnameTab(funcs []Func) (tab []byte, offs []int32) {
offs = make([]int32, len(funcs))
offset := 0
for i, f := range funcs {
offs[i] = int32(offset)
a, b, c := funcNameParts(f.Name)
tab = append(tab, a...)
tab = append(tab, b...)
tab = append(tab, c...)
tab = append(tab, 0)
offset += len(a) + len(b) + len(c) + 1
}
return
}
type compilationUnit struct {
fileNames []string
}
// CU table format:
// cuOffsets[0] -> filetabOffset[0] filetabOffset[1] ... filetabOffset[len(CUs[0].fileNames)-1]
// cuOffsets[1] -> filetabOffset[len(CUs[0].fileNames)] ... filetabOffset[len(CUs[0].fileNames) + len(CUs[1].fileNames)-1]
// ...
//
// file name table format:
// filetabOffset[0] -> CUs[0].fileNames[0] \x00
// ...
// filetabOffset[len(CUs[0]-1)] -> CUs[0].fileNames[len(CUs[0].fileNames)-1] \x00
// ...
// filetabOffset[SUM(CUs,fileNames)-1] -> CUs[len(CU)-1].fileNames[len(CUs[len(CU)-1].fileNames)-1] \x00
func makeFilenametab(cus []compilationUnit) (cutab []uint32, filetab []byte, cuOffsets []uint32) {
cuOffsets = make([]uint32, len(cus))
cuOffset := 0
fileOffset := 0
for i, cu := range cus {
cuOffsets[i] = uint32(cuOffset)
for _, name := range cu.fileNames {
cutab = append(cutab, uint32(fileOffset))
fileOffset += len(name) + 1
filetab = append(filetab, name...)
filetab = append(filetab, 0)
}
cuOffset += len(cu.fileNames)
}
return
}
func writeFuncdata(out *[]byte, funcs []Func) (fstart int, funcdataOffs [][]uint32) {
fstart = len(*out)
*out = append(*out, byte(0))
offs := uint32(1)
funcdataOffs = make([][]uint32, len(funcs))
for i, f := range funcs {
var writer = func(fd encoding.BinaryMarshaler) {
var ab []byte
var err error
if fd != nil {
ab, err = fd.MarshalBinary()
if err != nil {
panic(err)
}
funcdataOffs[i] = append(funcdataOffs[i], offs)
} else {
ab = []byte{0}
funcdataOffs[i] = append(funcdataOffs[i], _INVALID_FUNCDATA_OFFSET)
}
*out = append(*out, ab...)
offs += uint32(len(ab))
}
writer(f.ArgsPointerMaps)
writer(f.LocalsPointerMaps)
writer(f.StackObjects)
writer(f.InlTree)
writer(f.OpenCodedDeferInfo)
writer(f.ArgInfo)
writer(f.ArgLiveInfo)
writer(f.WrapInfo)
}
return
}
func makeFtab(funcs []_func, lastFuncSize uint32) (ftab []funcTab) {
// Allocate space for the pc->func table. This structure consists of a pc offset
// and an offset to the func structure. After that, we have a single pc
// value that marks the end of the last function in the binary.
var size int64 = int64(len(funcs)*2*4 + 4)
var startLocations = make([]uint32, len(funcs))
for i, f := range funcs {
size = rnd(size, int64(_PtrSize))
//writePCToFunc
startLocations[i] = uint32(size)
size += int64(uint8(_FUNC_SIZE)+f.nfuncdata*4+uint8(f.npcdata)*4)
}
ftab = make([]funcTab, 0, len(funcs)+1)
// write a map of pc->func info offsets
for i, f := range funcs {
ftab = append(ftab, funcTab{uint32(f.entryOff), uint32(startLocations[i])})
}
// Final entry of table is just end pc offset.
lastFunc := funcs[len(funcs)-1]
ftab = append(ftab, funcTab{uint32(lastFunc.entryOff + lastFuncSize), 0})
return
}
// Pcln table format: [...]funcTab + [...]_Func
func makePclntable(funcs []_func, lastFuncSize uint32, pcdataOffs [][]uint32, funcdataOffs [][]uint32) (pclntab []byte) {
// Allocate space for the pc->func table. This structure consists of a pc offset
// and an offset to the func structure. After that, we have a single pc
// value that marks the end of the last function in the binary.
var size int64 = int64(len(funcs)*2*4 + 4)
var startLocations = make([]uint32, len(funcs))
for i := range funcs {
size = rnd(size, int64(_PtrSize))
//writePCToFunc
startLocations[i] = uint32(size)
size += int64(int(_FUNC_SIZE)+len(funcdataOffs[i])*4+len(pcdataOffs[i])*4)
}
pclntab = make([]byte, size, size)
// write a map of pc->func info offsets
offs := 0
for i, f := range funcs {
byteOrder.PutUint32(pclntab[offs:offs+4], uint32(f.entryOff))
byteOrder.PutUint32(pclntab[offs+4:offs+8], uint32(startLocations[i]))
offs += 8
}
// Final entry of table is just end pc offset.
lastFunc := funcs[len(funcs)-1]
byteOrder.PutUint32(pclntab[offs:offs+4], uint32(lastFunc.entryOff+lastFuncSize))
// write func info table
for i, f := range funcs {
off := startLocations[i]
// write _func structure to pclntab
fb := rt.BytesFrom(unsafe.Pointer(&f), int(_FUNC_SIZE), int(_FUNC_SIZE))
copy(pclntab[off:off+uint32(_FUNC_SIZE)], fb)
off += uint32(_FUNC_SIZE)
// NOTICE: _func.pcdata always starts from PcUnsafePoint, which is index 3
for j := 3; j < len(pcdataOffs[i]); j++ {
byteOrder.PutUint32(pclntab[off:off+4], uint32(pcdataOffs[i][j]))
off += 4
}
// funcdata refs as offsets from gofunc
for _, funcdata := range funcdataOffs[i] {
byteOrder.PutUint32(pclntab[off:off+4], uint32(funcdata))
off += 4
}
}
return
}
// findfunc table used to map pc to belonging func,
// returns the index in the func table.
//
// All text section are divided into buckets sized _BUCKETSIZE(4K):
// every bucket is divided into _SUBBUCKETS sized _SUB_BUCKETSIZE(64),
// and it has a base idx to plus the offset stored in jth subbucket.
// see findfunc() in runtime/symtab.go
func writeFindfunctab(out *[]byte, ftab []funcTab) (start int) {
start = len(*out)
max := ftab[len(ftab)-1].entry
min := ftab[0].entry
nbuckets := (max - min + _BUCKETSIZE - 1) / _BUCKETSIZE
n := (max - min + _SUB_BUCKETSIZE - 1) / _SUB_BUCKETSIZE
tab := make([]findfuncbucket, 0, nbuckets)
var s, e = 0, 0
for i := 0; i<int(nbuckets); i++ {
var pc = min + uint32((i+1)*_BUCKETSIZE)
// find the end func of the bucket
for ; e < len(ftab)-1 && ftab[e+1].entry <= pc; e++ {}
// store the start func of the bucket
var fb = findfuncbucket{idx: uint32(s)}
for j := 0; j<_SUBBUCKETS && (i*_SUBBUCKETS+j)<int(n); j++ {
pc = min + uint32(i*_BUCKETSIZE) + uint32((j+1)*_SUB_BUCKETSIZE)
var ss = s
// find the end func of the subbucket
for ; ss < len(ftab)-1 && ftab[ss+1].entry <= pc; ss++ {}
// store the start func of the subbucket
fb._SUBBUCKETS[j] = byte(uint32(s) - fb.idx)
s = ss
}
s = e
tab = append(tab, fb)
}
// write findfuncbucket
if len(tab) > 0 {
size := int(unsafe.Sizeof(findfuncbucket{}))*len(tab)
*out = append(*out, rt.BytesFrom(unsafe.Pointer(&tab[0]), size, size)...)
}
return
}
func makeModuledata(name string, filenames []string, funcs []Func, text []byte) (mod *moduledata) {
mod = new(moduledata)
mod.modulename = name
// make filename table
cu := make([]string, 0, len(filenames))
for _, f := range filenames {
cu = append(cu, f)
}
cutab, filetab, cuOffs := makeFilenametab([]compilationUnit{{cu}})
mod.cutab = cutab
mod.filetab = filetab
// make funcname table
funcnametab, nameOffs := makeFuncnameTab(funcs)
mod.funcnametab = funcnametab
// make pcdata table
// NOTICE: _func only use offset to index pcdata, thus no need mmap() pcdata
pctab, pcdataOffs, _funcs := makePctab(funcs, cuOffs, nameOffs)
mod.pctab = pctab
// write func data
// NOTICE: _func use mod.gofunc+offset to directly point funcdata, thus need cache funcdata
// TODO: estimate accurate capacity
cache := make([]byte, 0, len(funcs)*int(_PtrSize))
fstart, funcdataOffs := writeFuncdata(&cache, funcs)
// make pc->func (binary search) func table
lastFuncsize := funcs[len(funcs)-1].TextSize
ftab := makeFtab(_funcs, lastFuncsize)
mod.ftab = ftab
// write pc->func (modmap) findfunc table
ffstart := writeFindfunctab(&cache, ftab)
// make pclnt table
pclntab := makePclntable(_funcs, lastFuncsize, pcdataOffs, funcdataOffs)
mod.pclntable = pclntab
// mmap() text and funcdata segements
p := os.Getpagesize()
size := int(rnd(int64(len(text)), int64(p)))
addr := mmap(size)
// copy the machine code
s := rt.BytesFrom(unsafe.Pointer(addr), len(text), size)
copy(s, text)
// make it executable
mprotect(addr, size)
// assign addresses
mod.text = addr
mod.etext = addr + uintptr(size)
mod.minpc = addr
mod.maxpc = addr + uintptr(len(text))
// cache funcdata and findfuncbucket
moduleCache.Lock()
moduleCache.m[mod] = cache
moduleCache.Unlock()
mod.gofunc = uintptr(unsafe.Pointer(&cache[fstart]))
mod.findfunctab = uintptr(unsafe.Pointer(&cache[ffstart]))
// make pc header
mod.pcHeader = &pcHeader {
magic : _Magic,
minLC : _MinLC,
ptrSize : _PtrSize,
nfunc : len(funcs),
nfiles: uint(len(cu)),
textStart: mod.text,
funcnameOffset: getOffsetOf(moduledata{}, "funcnametab"),
cuOffset: getOffsetOf(moduledata{}, "cutab"),
filetabOffset: getOffsetOf(moduledata{}, "filetab"),
pctabOffset: getOffsetOf(moduledata{}, "pctab"),
pclnOffset: getOffsetOf(moduledata{}, "pclntable"),
}
// sepecial case: gcdata and gcbss must by non-empty
mod.gcdata = uintptr(unsafe.Pointer(&emptyByte))
mod.gcbss = uintptr(unsafe.Pointer(&emptyByte))
return
}
// makePctab generates pcdelta->valuedelta tables for functions,
// and returns the table and the entry offset of every kind pcdata in the table.
func makePctab(funcs []Func, cuOffset []uint32, nameOffset []int32) (pctab []byte, pcdataOffs [][]uint32, _funcs []_func) {
_funcs = make([]_func, len(funcs))
// Pctab offsets of 0 are considered invalid in the runtime. We respect
// that by just padding a single byte at the beginning of runtime.pctab,
// that way no real offsets can be zero.
pctab = make([]byte, 1, 12*len(funcs)+1)
pcdataOffs = make([][]uint32, len(funcs))
for i, f := range funcs {
_f := &_funcs[i]
var writer = func(pc *Pcdata) {
var ab []byte
var err error
if pc != nil {
ab, err = pc.MarshalBinary()
if err != nil {
panic(err)
}
pcdataOffs[i] = append(pcdataOffs[i], uint32(len(pctab)))
} else {
ab = []byte{0}
pcdataOffs[i] = append(pcdataOffs[i], _PCDATA_INVALID_OFFSET)
}
pctab = append(pctab, ab...)
}
if f.Pcsp != nil {
_f.pcsp = uint32(len(pctab))
}
writer(f.Pcsp)
if f.Pcfile != nil {
_f.pcfile = uint32(len(pctab))
}
writer(f.Pcfile)
if f.Pcline != nil {
_f.pcln = uint32(len(pctab))
}
writer(f.Pcline)
writer(f.PcUnsafePoint)
writer(f.PcStackMapIndex)
writer(f.PcInlTreeIndex)
writer(f.PcArgLiveIndex)
_f.entryOff = f.EntryOff
_f.nameOff = nameOffset[i]
_f.args = f.ArgsSize
_f.deferreturn = f.DeferReturn
// NOTICE: _func.pcdata is always as [PCDATA_UnsafePoint(0) : PCDATA_ArgLiveIndex(3)]
_f.npcdata = uint32(_N_PCDATA)
_f.cuOffset = cuOffset[i]
_f.funcID = f.ID
_f.flag = f.Flag
_f.nfuncdata = uint8(_N_FUNCDATA)
}
return
}
func registerFunction(name string, pc uintptr, textSize uintptr, fp int, args int, size uintptr, argptrs uintptr, localptrs uintptr) {}

View File

@@ -34,4 +34,4 @@ type Loader struct {
Name string // module name
File string // file name
Options
}
}

View File

@@ -1,5 +1,5 @@
//go:build !windows
// +build !windows
//go:build darwin || linux
// +build darwin linux
/**
* Copyright 2023 ByteDance Inc.
@@ -42,4 +42,4 @@ func mprotect(p uintptr, nb int) {
if _, _, err := syscall.RawSyscall(syscall.SYS_MPROTECT, p, uintptr(nb), _RX); err != 0 {
panic(err)
}
}
}

View File

@@ -16,10 +16,6 @@
package loader
import (
`encoding/binary`
)
const (
_N_PCDATA = 4
@@ -53,16 +49,40 @@ const (
var emptyByte byte
// Pcvalue is the program count corresponding to the value Val
// WARN: we use relative value here (to function entry)
type Pcvalue struct {
PC uint32 // program count relative to function entry
Val int32 // value relative to the value in function entry
func encodeValue(v int) []byte {
return encodeVariant(toZigzag(v))
}
func toZigzag(v int) int {
return (v << 1) ^ (v >> 31)
}
func encodeVariant(v int) []byte {
var u int
var r []byte
/* split every 7 bits */
for v > 127 {
u = v & 0x7f
v = v >> 7
r = append(r, byte(u) | 0x80)
}
/* check for last one */
if v == 0 {
return r
}
/* add the last one */
r = append(r, byte(v))
return r
}
type Pcvalue struct {
PC uint32 // PC offset from func entry
Val int32
}
// Pcdata represents pc->value mapping table.
// WARN: we use ** [Pcdata[i].PC, Pcdata[i+1].PC) **
// as the range where the Pcdata[i].Val is effective.
type Pcdata []Pcvalue
// see https://docs.google.com/document/d/1lyPIbmsYbXnpNj57a261hgOYVpNRcgydurVQIyZOz_o/pub
@@ -70,24 +90,11 @@ func (self Pcdata) MarshalBinary() (data []byte, err error) {
// delta value always starts from -1
sv := int32(_PCDATA_START_VAL)
sp := uint32(0)
buf := make([]byte, binary.MaxVarintLen32)
for _, v := range self {
if v.PC < sp {
panic("PC must be in ascending order!")
}
dp := uint64(v.PC - sp)
dv := int64(v.Val - sv)
if dv == 0 || dp == 0 {
continue
}
n := binary.PutVarint(buf, dv)
data = append(data, buf[:n]...)
n2 := binary.PutUvarint(buf, dp)
data = append(data, buf[:n2]...)
data = append(data, encodeVariant(toZigzag(int(v.Val - sv)))...)
data = append(data, encodeVariant(int(v.PC - sp))...)
sp = v.PC
sv = v.Val
}
// put 0 to indicate ends
data = append(data, 0)
return
}
}

View File

@@ -17,8 +17,7 @@
package loader
import (
"sync/atomic"
"unsafe"
`sync`
_ `unsafe`
)
@@ -26,35 +25,16 @@ import (
//goland:noinspection GoUnusedGlobalVariable
var lastmoduledatap *moduledata
var moduledataMux sync.Mutex
func registerModule(mod *moduledata) {
registerModuleLockFree(&lastmoduledatap, mod)
moduledataMux.Lock()
lastmoduledatap.next = mod
lastmoduledatap = mod
moduledataMux.Unlock()
}
//go:linkname moduledataverify1 runtime.moduledataverify1
func moduledataverify1(_ *moduledata)
func registerModuleLockFree(tail **moduledata, mod *moduledata) {
for {
oldTail := loadModule(tail)
if casModule(tail, oldTail, mod) {
storeModule(&oldTail.next, mod)
break
}
}
}
func loadModule(p **moduledata) *moduledata {
return (*moduledata)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(p))))
}
func storeModule(p **moduledata, value *moduledata) {
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(p)), unsafe.Pointer(value))
}
func casModule(p **moduledata, oldValue *moduledata, newValue *moduledata) bool {
return atomic.CompareAndSwapPointer(
(*unsafe.Pointer)(unsafe.Pointer(p)),
unsafe.Pointer(oldValue),
unsafe.Pointer(newValue),
)
}

View File

@@ -1,4 +1,4 @@
// +build amd64,go1.16,!go1.23
// +build amd64,go1.15,!go1.21
/*
* Copyright 2021 ByteDance Inc.
@@ -58,12 +58,6 @@ func (cfg Config) Froze() API {
if cfg.ValidateString {
api.encoderOpts |= encoder.ValidateString
}
if cfg.NoValidateJSONMarshaler {
api.encoderOpts |= encoder.NoValidateJSONMarshaler
}
if cfg.NoEncoderNewline {
api.encoderOpts |= encoder.NoEncoderNewline
}
// configure decoder options:
if cfg.UseInt64 {
@@ -145,23 +139,23 @@ func (cfg frozenConfig) Valid(data []byte) bool {
// Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
// a compile option to set the depth of recursive compile for the nested struct type.
func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
if err := encoder.Pretouch(vt, opts...); err != nil {
return err
}
if err := decoder.Pretouch(vt, opts...); err != nil {
return err
}
// to pretouch the corresponding pointer type as well
if vt.Kind() == reflect.Ptr {
vt = vt.Elem()
} else {
vt = reflect.PtrTo(vt)
}
if err := encoder.Pretouch(vt, opts...); err != nil {
return err
}
if err := decoder.Pretouch(vt, opts...); err != nil {
return err
}
return nil
if err := encoder.Pretouch(vt, opts...); err != nil {
return err
}
if err := decoder.Pretouch(vt, opts...); err != nil {
return err
}
// to pretouch the corresponding pointer type as well
if vt.Kind() == reflect.Ptr {
vt = vt.Elem()
} else {
vt = reflect.PtrTo(vt)
}
if err := encoder.Pretouch(vt, opts...); err != nil {
return err
}
if err := decoder.Pretouch(vt, opts...); err != nil {
return err
}
return nil
}

View File

@@ -25,45 +25,27 @@ import (
`github.com/bytedance/sonic/internal/rt`
)
// String unescapes a escaped string (not including `"` at begining and end)
// It validates invalid UTF8 and replace with `\ufffd`
func String(s string) (ret string, err types.ParsingError) {
mm := make([]byte, 0, len(s))
err = intoBytesUnsafe(s, &mm, true)
err = intoBytesUnsafe(s, &mm)
ret = rt.Mem2Str(mm)
return
}
// IntoBytes is same with String besides it output result into a buffer m
func IntoBytes(s string, m *[]byte) types.ParsingError {
if cap(*m) < len(s) {
return types.ERR_EOF
} else {
return intoBytesUnsafe(s, m, true)
return intoBytesUnsafe(s, m)
}
}
// String unescapes a escaped string (not including `"` at begining and end)
// - replace enables replacing invalid utf8 escaped char with `\uffd`
func _String(s string, replace bool) (ret string, err error) {
mm := make([]byte, 0, len(s))
err = intoBytesUnsafe(s, &mm, replace)
ret = rt.Mem2Str(mm)
return
}
func intoBytesUnsafe(s string, m *[]byte, replace bool) types.ParsingError {
func intoBytesUnsafe(s string, m *[]byte) types.ParsingError {
pos := -1
slv := (*rt.GoSlice)(unsafe.Pointer(m))
str := (*rt.GoString)(unsafe.Pointer(&s))
flags := uint64(0)
if replace {
/* unquote as the default configuration, replace invalid unicode with \ufffd */
flags |= types.F_UNICODE_REPLACE
}
ret := native.Unquote(str.Ptr, str.Len, slv.Ptr, &pos, flags)
/* unquote as the default configuration, replace invalid unicode with \ufffd */
ret := native.Unquote(str.Ptr, str.Len, slv.Ptr, &pos, types.F_UNICODE_REPLACE)
/* check for errors */
if ret < 0 {
@@ -75,6 +57,3 @@ func intoBytesUnsafe(s string, m *[]byte, replace bool) types.ParsingError {
runtime.KeepAlive(s)
return 0
}

View File

@@ -68,4 +68,4 @@ func Validate(src []byte) bool {
// ValidateString as Validate, but for string.
func ValidateString(src string) bool {
return native.ValidateUTF8Fast(&src) == 0
}
}