处理AI胡乱生成的乱摊子

This commit is contained in:
2025-09-07 20:36:02 +08:00
parent ba513e0827
commit c4522b974b
403 changed files with 22915 additions and 44424 deletions

View File

@@ -1,157 +0,0 @@
// +build amd64,go1.16,!go1.22
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`runtime`
`unsafe`
`github.com/bytedance/sonic/encoder`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
uq `github.com/bytedance/sonic/unquote`
`github.com/chenzhuoyu/base64x`
)
var typeByte = rt.UnpackEface(byte(0)).Type
//go:nocheckptr
func quote(buf *[]byte, val string) {
*buf = append(*buf, '"')
if len(val) == 0 {
*buf = append(*buf, '"')
return
}
sp := rt.IndexChar(val, 0)
nb := len(val)
b := (*rt.GoSlice)(unsafe.Pointer(buf))
// input buffer
for nb > 0 {
// output buffer
dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len))
dn := b.Cap - b.Len
// call native.Quote, dn is byte count it outputs
ret := native.Quote(sp, nb, dp, &dn, 0)
// update *buf length
b.Len += dn
// no need more output
if ret >= 0 {
break
}
// double buf size
*b = growslice(typeByte, *b, b.Cap*2)
// ret is the complement of consumed input
ret = ^ret
// update input buffer
nb -= ret
sp = unsafe.Pointer(uintptr(sp) + uintptr(ret))
}
runtime.KeepAlive(buf)
runtime.KeepAlive(sp)
*buf = append(*buf, '"')
}
func unquote(src string) (string, types.ParsingError) {
return uq.String(src)
}
func decodeBase64(src string) ([]byte, error) {
return base64x.StdEncoding.DecodeString(src)
}
func encodeBase64(src []byte) string {
return base64x.StdEncoding.EncodeToString(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
sv := (*rt.GoString)(unsafe.Pointer(&self.s))
flag := types.F_USE_NUMBER
if self.dbuf != nil {
flag = 0
val.Dbuf = self.dbuf
val.Dcap = types.MaxDigitNums
}
self.p = native.Value(sv.Ptr, sv.Len, self.p, &val, uint64(flag))
return
}
func (self *Parser) skip() (int, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.SkipOne(&self.s, &self.p, fsm, 0)
types.FreeStateMachine(fsm)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Node) encodeInterface(buf *[]byte) error {
//WARN: NOT compatible with json.Encoder
return encoder.EncodeInto(buf, self.packAny(), 0)
}
func (self *Parser) skipFast() (int, types.ParsingError) {
start := native.SkipOneFast(&self.s, &self.p)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.GetByPath(&self.s, &self.p, &path, fsm)
types.FreeStateMachine(fsm)
runtime.KeepAlive(path)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}
func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
var err types.ParsingError
var start int
self.parser.p = 0
start, err = self.parser.getByPath(path...)
if err != 0 {
// for compatibility with old version
if err == types.ERR_NOT_FOUND {
return Node{}, ErrNotExist
}
if err == types.ERR_UNSUPPORT_TYPE {
panic("path must be either int(>=0) or string")
}
return Node{}, self.parser.syntaxError(err)
}
t := switchRawType(self.parser.s[start])
if t == _V_NONE {
return Node{}, self.parser.ExportError(err)
}
return newRawNode(self.parser.s[start:self.parser.p], t), nil
}

View File

@@ -1,57 +1,39 @@
// +build !amd64 !go1.16 go1.22
// +build !amd64,!arm64 go1.26 !go1.17 arm64,!go1.20
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`encoding/base64`
`encoding/json`
`fmt`
`unicode/utf8`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/compat`
)
func init() {
println("WARNING: sonic only supports Go1.16~1.20 && CPU amd64, but your environment is not suitable")
compat.Warn("sonic/ast")
}
func quote(buf *[]byte, val string) {
quoteString(buf, val)
}
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
}
func decodeBase64(src string) ([]byte, error) {
return base64.StdEncoding.DecodeString(src)
}
func encodeBase64(src []byte) string {
return base64.StdEncoding.EncodeToString(src)
}
func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
@@ -88,37 +70,34 @@ func (self *Node) encodeInterface(buf *[]byte) error {
return nil
}
func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
self.parser.p = 0
var err types.ParsingError
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err = self.parser.searchIndex(idx); err != 0 {
return Node{}, self.parser.ExportError(err)
if err := self.searchIndex(idx); err != 0 {
return self.p, err
}
} else if key, ok := p.(string); ok {
if err = self.parser.searchKey(key); err != 0 {
return Node{}, self.parser.ExportError(err)
if err := self.searchKey(key); err != 0 {
return self.p, err
}
} else {
panic("path must be either int(>=0) or string")
}
}
var start = self.parser.p
if start, err = self.parser.skip(); err != 0 {
return Node{}, self.parser.ExportError(err)
var start int
var e types.ParsingError
if validate {
start, e = self.skip()
} else {
start, e = self.skipFast()
}
ns := len(self.parser.s)
if self.parser.p > ns || start >= ns || start>=self.parser.p {
return Node{}, fmt.Errorf("skip %d char out of json boundary", start)
if e != 0 {
return self.p, e
}
return start, 0
}
t := switchRawType(self.parser.s[start])
if t == _V_NONE {
return Node{}, self.parser.ExportError(err)
}
return newRawNode(self.parser.s[start:self.parser.p], t), nil
}
func validate_utf8(str string) bool {
return utf8.ValidString(str)
}

View File

@@ -17,36 +17,35 @@
package ast
import (
`encoding/base64`
`runtime`
`strconv`
`unsafe`
"encoding/base64"
"runtime"
"strconv"
"unsafe"
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/internal/utils"
"github.com/bytedance/sonic/unquote"
)
const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
var bytesNull = []byte("null")
const (
bytesNull = "null"
strNull = "null"
bytesTrue = "true"
bytesFalse = "false"
bytesObject = "{}"
bytesArray = "[]"
)
func isSpace(c byte) bool {
return (int(1<<c) & _blankCharsMask) != 0
}
//go:nocheckptr
func skipBlank(src string, pos int) int {
se := uintptr(rt.IndexChar(src, len(src)))
sp := uintptr(rt.IndexChar(src, pos))
for sp < se {
if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
if !utils.IsSpace(*(*byte)(unsafe.Pointer(sp))) {
break
}
sp += 1
@@ -63,7 +62,7 @@ func decodeNull(src string, pos int) (ret int) {
if ret > len(src) {
return -int(types.ERR_EOF)
}
if src[pos:ret] == bytesNull {
if src[pos:ret] == strNull {
return ret
} else {
return -int(types.ERR_INVALID_CHAR)
@@ -103,13 +102,13 @@ func decodeString(src string, pos int) (ret int, v string) {
return ret, v
}
vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
if !ok {
result, err := unquote.String(src[pos:ret])
if err != 0 {
return -int(types.ERR_INVALID_CHAR), ""
}
runtime.KeepAlive(src)
return ret, rt.Mem2Str(vv)
return ret, result
}
func decodeBinary(src string, pos int) (ret int, v []byte) {
@@ -287,67 +286,7 @@ func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState)
//go:nocheckptr
func skipNumber(src string, pos int) (ret int) {
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF)
}
if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
ss := sp
var pointer bool
var exponent bool
var lastIsDigit bool
var nextNeedDigit = true
for ; sp < se; sp += uintptr(1) {
c := *(*byte)(unsafe.Pointer(sp))
if isDigit(c) {
lastIsDigit = true
nextNeedDigit = false
continue
} else if nextNeedDigit {
return -int(types.ERR_INVALID_CHAR)
} else if c == '.' {
if !lastIsDigit || pointer || exponent || sp == ss {
return -int(types.ERR_INVALID_CHAR)
}
pointer = true
lastIsDigit = false
nextNeedDigit = true
continue
} else if c == 'e' || c == 'E' {
if !lastIsDigit || exponent {
return -int(types.ERR_INVALID_CHAR)
}
if sp == se-1 {
return -int(types.ERR_EOF)
}
exponent = true
lastIsDigit = false
nextNeedDigit = false
continue
} else if c == '-' || c == '+' {
if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
return -int(types.ERR_INVALID_CHAR)
}
lastIsDigit = false
nextNeedDigit = true
continue
} else {
break
}
}
if nextNeedDigit {
return -int(types.ERR_EOF)
}
runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
return utils.SkipNumber(src, pos)
}
//go:nocheckptr
@@ -583,3 +522,36 @@ func skipArray(src string, pos int) (ret int, start int) {
pos++
}
}
// DecodeString decodes a JSON string from pos and return golang string.
// - needEsc indicates if to unescaped escaping chars
// - hasEsc tells if the returned string has escaping chars
// - validStr enables validating UTF8 charset
//
func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
if validStr && !validate_utf8(str) {
return "", -int(types.ERR_INVALID_UTF8), false
}
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := unquote.String(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}

View File

@@ -17,12 +17,12 @@
package ast
import (
`sync`
`unicode/utf8`
)
"sync"
"unicode/utf8"
const (
_MaxBuffer = 1024 // 1KB buffer size
"github.com/bytedance/gopkg/lang/dirtmake"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/option"
)
func quoteString(e *[]byte, s string) {
@@ -30,7 +30,7 @@ func quoteString(e *[]byte, s string) {
start := 0
for i := 0; i < len(s); {
if b := s[i]; b < utf8.RuneSelf {
if safeSet[b] {
if rt.SafeSet[b] {
i++
continue
}
@@ -54,8 +54,8 @@ func quoteString(e *[]byte, s string) {
// user-controlled strings are rendered into JSON
// and served to some browsers.
*e = append(*e, `u00`...)
*e = append(*e, hex[b>>4])
*e = append(*e, hex[b&0xF])
*e = append(*e, rt.Hex[b>>4])
*e = append(*e, rt.Hex[b&0xF])
}
i++
start = i
@@ -76,7 +76,7 @@ func quoteString(e *[]byte, s string) {
*e = append(*e, s[start:i]...)
}
*e = append(*e, `\u202`...)
*e = append(*e, hex[c&0xF])
*e = append(*e, rt.Hex[c&0xF])
i += size
start = i
continue
@@ -92,16 +92,29 @@ func quoteString(e *[]byte, s string) {
var bytesPool = sync.Pool{}
func (self *Node) MarshalJSON() ([]byte, error) {
if self == nil {
return bytesNull, nil
}
// fast path for raw node
if self.isRaw() {
return rt.Str2Mem(self.toString()), nil
}
buf := newBuffer()
err := self.encode(buf)
if err != nil {
freeBuffer(buf)
return nil, err
}
ret := make([]byte, len(*buf))
copy(ret, *buf)
freeBuffer(buf)
var ret []byte
if !rt.CanSizeResue(cap(*buf)) {
ret = *buf
} else {
ret = dirtmake.Bytes(len(*buf), len(*buf))
copy(ret, *buf)
freeBuffer(buf)
}
return ret, err
}
@@ -109,21 +122,24 @@ func newBuffer() *[]byte {
if ret := bytesPool.Get(); ret != nil {
return ret.(*[]byte)
} else {
buf := make([]byte, 0, _MaxBuffer)
buf := make([]byte, 0, option.DefaultAstBufferSize)
return &buf
}
}
func freeBuffer(buf *[]byte) {
if !rt.CanSizeResue(cap(*buf)) {
return
}
*buf = (*buf)[:0]
bytesPool.Put(buf)
}
func (self *Node) encode(buf *[]byte) error {
if self.IsRaw() {
if self.isRaw() {
return self.encodeRaw(buf)
}
switch self.Type() {
switch int(self.itype()) {
case V_NONE : return ErrNotExist
case V_ERROR : return self.Check()
case V_NULL : return self.encodeNull(buf)
@@ -139,16 +155,21 @@ func (self *Node) encode(buf *[]byte) error {
}
func (self *Node) encodeRaw(buf *[]byte) error {
raw, err := self.Raw()
if err != nil {
return err
lock := self.rlock()
if !self.isRaw() {
self.runlock()
return self.encode(buf)
}
raw := self.toString()
if lock {
self.runlock()
}
*buf = append(*buf, raw...)
return nil
}
func (self *Node) encodeNull(buf *[]byte) error {
*buf = append(*buf, bytesNull...)
*buf = append(*buf, strNull...)
return nil
}
@@ -193,20 +214,9 @@ func (self *Node) encodeArray(buf *[]byte) error {
*buf = append(*buf, '[')
var s = (*linkedNodes)(self.p)
var started bool
if nb > 0 {
n := s.At(0)
if n.Exists() {
if err := n.encode(buf); err != nil {
return err
}
started = true
}
}
for i := 1; i < nb; i++ {
n := s.At(i)
for i := 0; i < nb; i++ {
n := self.nodeAt(i)
if !n.Exists() {
continue
}
@@ -250,21 +260,10 @@ func (self *Node) encodeObject(buf *[]byte) error {
*buf = append(*buf, '{')
var s = (*linkedPairs)(self.p)
var started bool
if nb > 0 {
n := s.At(0)
if n.Value.Exists() {
if err := n.encode(buf); err != nil {
return err
}
started = true
}
}
for i := 1; i < nb; i++ {
n := s.At(i)
if !n.Value.Exists() {
for i := 0; i < nb; i++ {
n := self.pairAt(i)
if n == nil || !n.Value.Exists() {
continue
}
if started {

View File

@@ -17,6 +17,10 @@ func newError(err types.ParsingError, msg string) *Node {
}
}
func newErrorPair(err SyntaxError) *Pair {
return &Pair{0, "", *newSyntaxError(err)}
}
// Error returns error message if the node is invalid
func (self Node) Error() string {
if self.t != V_ERROR {
@@ -79,7 +83,7 @@ func (self SyntaxError) description() string {
/* check for empty source */
if self.Src == "" {
return fmt.Sprintf("no sources available: %#v", self)
return fmt.Sprintf("no sources available, the input json is empty: %#v", self)
}
/* prevent slicing before the beginning */

View File

@@ -17,30 +17,48 @@
package ast
import (
`fmt`
"fmt"
`github.com/bytedance/sonic/internal/native/types`
"github.com/bytedance/sonic/internal/caching"
"github.com/bytedance/sonic/internal/native/types"
)
type Pair struct {
hash uint64
Key string
Value Node
}
func NewPair(key string, val Node) Pair {
return Pair{
hash: caching.StrHash(key),
Key: key,
Value: val,
}
}
// Values returns iterator for array's children traversal
func (self *Node) Values() (ListIterator, error) {
if err := self.should(types.V_ARRAY, "an array"); err != nil {
if err := self.should(types.V_ARRAY); err != nil {
return ListIterator{}, err
}
return ListIterator{Iterator{p: self}}, nil
return self.values(), nil
}
func (self *Node) values() ListIterator {
return ListIterator{Iterator{p: self}}
}
// Properties returns iterator for object's children traversal
func (self *Node) Properties() (ObjectIterator, error) {
if err := self.should(types.V_OBJECT, "an object"); err != nil {
if err := self.should(types.V_OBJECT); err != nil {
return ObjectIterator{}, err
}
return ObjectIterator{Iterator{p: self}}, nil
return self.properties(), nil
}
func (self *Node) properties() ObjectIterator {
return ObjectIterator{Iterator{p: self}}
}
type Iterator struct {
@@ -114,7 +132,7 @@ next_start:
} else {
n := self.p.pairAt(self.i)
self.i++
if !n.Value.Exists() {
if n == nil || !n.Value.Exists() {
goto next_start
}
return n
@@ -155,11 +173,14 @@ type Scanner func(path Sequence, node *Node) bool
// ForEach scans one V_OBJECT node's children from JSON head to tail,
// and pass the Sequence and Node of corresponding JSON value.
//
// Especailly, if the node is not V_ARRAY or V_OBJECT,
// Especially, if the node is not V_ARRAY or V_OBJECT,
// the node itself will be returned and Sequence.Index == -1.
//
// NOTICE: A unsetted node WON'T trigger sc, but its index still counts into Path.Index
// NOTICE: An unset node WON'T trigger sc, but its index still counts into Path.Index
func (self *Node) ForEach(sc Scanner) error {
if err := self.checkRaw(); err != nil {
return err
}
switch self.itype() {
case types.V_ARRAY:
iter, err := self.Values()

File diff suppressed because it is too large Load Diff

View File

@@ -17,14 +17,18 @@
package ast
import (
`fmt`
"fmt"
"sync"
"sync/atomic"
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/internal/utils"
"github.com/bytedance/sonic/unquote"
)
const (
_DEFAULT_NODE_CAP int = 8
_DEFAULT_NODE_CAP int = 16
_APPEND_GROW_SHIFT = 1
)
@@ -45,6 +49,7 @@ type Parser struct {
p int
s string
noLazy bool
loadOnce bool
skipValue bool
dbuf *byte
}
@@ -60,7 +65,7 @@ func (self *Parser) delim() types.ParsingError {
return types.ERR_EOF
}
/* check for the delimtier */
/* check for the delimiter */
if self.s[p] != ':' {
return types.ERR_INVALID_CHAR
}
@@ -79,7 +84,7 @@ func (self *Parser) object() types.ParsingError {
return types.ERR_EOF
}
/* check for the delimtier */
/* check for the delimiter */
if self.s[p] != '{' {
return types.ERR_INVALID_CHAR
}
@@ -98,7 +103,7 @@ func (self *Parser) array() types.ParsingError {
return types.ERR_EOF
}
/* check for the delimtier */
/* check for the delimiter */
if self.s[p] != '[' {
return types.ERR_INVALID_CHAR
}
@@ -110,11 +115,15 @@ func (self *Parser) array() types.ParsingError {
func (self *Parser) lspace(sp int) int {
ns := len(self.s)
for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
for ; sp<ns && utils.IsSpace(self.s[sp]); sp+=1 {}
return sp
}
func (self *Parser) backward() {
for ; self.p >= 0 && utils.IsSpace(self.s[self.p]); self.p-=1 {}
}
func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
sp := self.p
ns := len(self.s)
@@ -148,7 +157,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
if t == _V_NONE {
return Node{}, types.ERR_INVALID_CHAR
}
val = newRawNode(self.s[start:self.p], t)
val = newRawNode(self.s[start:self.p], t, false)
}else{
/* decode the value */
if val, err = self.Parse(); err != 0 {
@@ -157,7 +166,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
}
/* add the value to result */
ret.Add(val)
ret.Push(val)
self.p = self.lspace(self.p)
/* check for EOF */
@@ -210,7 +219,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote(key); err != 0 {
if key, err = unquote.String(key); err != 0 {
return Node{}, err
}
}
@@ -234,7 +243,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
if t == _V_NONE {
return Node{}, types.ERR_INVALID_CHAR
}
val = newRawNode(self.s[start:self.p], t)
val = newRawNode(self.s[start:self.p], t, false)
} else {
/* decode the value */
if val, err = self.Parse(); err != 0 {
@@ -244,7 +253,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
/* add the value to result */
// FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
ret.Add(Pair{Key: key, Value: val})
ret.Push(NewPair(key, val))
self.p = self.lspace(self.p)
/* check for EOF */
@@ -275,7 +284,7 @@ func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
}
/* unquote the string */
out, err := unquote(s)
out, err := unquote.String(s)
/* check for errors */
if err != 0 {
@@ -291,6 +300,10 @@ func (self *Parser) Pos() int {
return self.p
}
// Parse returns a ast.Node representing the parser's JSON.
// NOTICE: the specific parsing lazy dependens parser's option
// It only parse first layer and first child for Object or Array be default
func (self *Parser) Parse() (Node, types.ParsingError) {
switch val := self.decodeValue(); val.Vt {
case types.V_EOF : return Node{}, types.ERR_EOF
@@ -299,22 +312,48 @@ func (self *Parser) Parse() (Node, types.ParsingError) {
case types.V_FALSE : return falseNode, 0
case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
case types.V_ARRAY:
s := self.p - 1;
if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
self.p = p + 1
return Node{t: types.V_ARRAY}, 0
}
if self.noLazy {
if self.loadOnce {
self.noLazy = false
}
return self.decodeArray(new(linkedNodes))
}
// NOTICE: loadOnce always keep raw json for object or array
if self.loadOnce {
self.p = s
s, e := self.skipFast()
if e != 0 {
return Node{}, e
}
return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0
}
return newLazyArray(self), 0
case types.V_OBJECT:
s := self.p - 1;
if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
self.p = p + 1
return Node{t: types.V_OBJECT}, 0
}
// NOTICE: loadOnce always keep raw json for object or array
if self.noLazy {
if self.loadOnce {
self.noLazy = false
}
return self.decodeObject(new(linkedPairs))
}
if self.loadOnce {
self.p = s
s, e := self.skipFast()
if e != 0 {
return Node{}, e
}
return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0
}
return newLazyObject(self), 0
case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
@@ -355,7 +394,7 @@ func (self *Parser) searchKey(match string) types.ParsingError {
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote(key); err != 0 {
if key, err = unquote.String(key); err != 0 {
return err
}
}
@@ -471,11 +510,11 @@ func (self *Node) skipNextNode() *Node {
if t == _V_NONE {
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
}
val = newRawNode(parser.s[start:parser.p], t)
val = newRawNode(parser.s[start:parser.p], t, false)
}
/* add the value to result */
ret.Add(val)
ret.Push(val)
self.l++
parser.p = parser.lspace(parser.p)
@@ -510,7 +549,7 @@ func (self *Node) skipNextPair() (*Pair) {
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
return newErrorPair(parser.syntaxError(types.ERR_EOF))
}
/* check for empty object */
@@ -527,7 +566,7 @@ func (self *Node) skipNextPair() (*Pair) {
/* decode the key */
if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
}
/* extract the key */
@@ -536,35 +575,35 @@ func (self *Node) skipNextPair() (*Pair) {
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = unquote(key); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
if key, err = unquote.String(key); err != 0 {
return newErrorPair(parser.syntaxError(err))
}
}
/* expect a ':' delimiter */
if err = parser.delim(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
return newErrorPair(parser.syntaxError(err))
}
/* skip the value */
if start, err := parser.skipFast(); err != 0 {
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
return newErrorPair(parser.syntaxError(err))
} else {
t := switchRawType(parser.s[start])
if t == _V_NONE {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
}
val = newRawNode(parser.s[start:parser.p], t)
val = newRawNode(parser.s[start:parser.p], t, false)
}
/* add the value to result */
ret.Add(Pair{Key: key, Value: val})
ret.Push(NewPair(key, val))
self.l++
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
return newErrorPair(parser.syntaxError(types.ERR_EOF))
}
/* check for the next character */
@@ -577,7 +616,7 @@ func (self *Node) skipNextPair() (*Pair) {
self.setObject(ret)
return ret.At(ret.Len()-1)
default:
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR))
}
}
@@ -601,7 +640,7 @@ func Loads(src string) (int, interface{}, error) {
}
}
// LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
// LoadsUseNumber parse all json into interface{}, with numeric nodes cast to json.Number
func LoadsUseNumber(src string) (int, interface{}, error) {
ps := &Parser{s: src}
np, err := ps.Parse()
@@ -653,3 +692,77 @@ func (self *Parser) ExportError(err types.ParsingError) error {
Code: err,
}.Description())
}
func backward(src string, i int) int {
for ; i>=0 && utils.IsSpace(src[i]); i-- {}
return i
}
func newRawNode(str string, typ types.ValueType, lock bool) Node {
ret := Node{
t: typ | _V_RAW,
p: rt.StrPtr(str),
l: uint(len(str)),
}
if lock {
ret.m = new(sync.RWMutex)
}
return ret
}
var typeJumpTable = [256]types.ValueType{
'"' : types.V_STRING,
'-' : _V_NUMBER,
'0' : _V_NUMBER,
'1' : _V_NUMBER,
'2' : _V_NUMBER,
'3' : _V_NUMBER,
'4' : _V_NUMBER,
'5' : _V_NUMBER,
'6' : _V_NUMBER,
'7' : _V_NUMBER,
'8' : _V_NUMBER,
'9' : _V_NUMBER,
'[' : types.V_ARRAY,
'f' : types.V_FALSE,
'n' : types.V_NULL,
't' : types.V_TRUE,
'{' : types.V_OBJECT,
}
func switchRawType(c byte) types.ValueType {
return typeJumpTable[c]
}
func (self *Node) loadt() types.ValueType {
return (types.ValueType)(atomic.LoadInt64(&self.t))
}
func (self *Node) lock() bool {
if m := self.m; m != nil {
m.Lock()
return true
}
return false
}
func (self *Node) unlock() {
if m := self.m; m != nil {
m.Unlock()
}
}
func (self *Node) rlock() bool {
if m := self.m; m != nil {
m.RLock()
return true
}
return false
}
func (self *Node) runlock() {
if m := self.m; m != nil {
m.RUnlock()
}
}

View File

@@ -16,8 +16,28 @@
package ast
import (
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/native/types`
)
// SearchOptions controls Searcher's behavior
type SearchOptions struct {
// ValidateJSON indicates the searcher to validate the entire JSON
ValidateJSON bool
// CopyReturn indicates the searcher to copy the result JSON instead of refer from the input
// This can help to reduce memory usage if you cache the results
CopyReturn bool
// ConcurrentRead indicates the searcher to return a concurrently-READ-safe node,
// including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON
ConcurrentRead bool
}
type Searcher struct {
parser Parser
SearchOptions
}
func NewSearcher(str string) *Searcher {
@@ -26,5 +46,112 @@ func NewSearcher(str string) *Searcher {
s: str,
noLazy: false,
},
SearchOptions: SearchOptions{
ValidateJSON: true,
},
}
}
// GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location
func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) {
self.CopyReturn = true
return self.getByPath(path...)
}
// GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location
//
// WARN: this search directly refer partial json from top json, which has faster speed,
// may consumes more memory.
func (self *Searcher) GetByPath(path ...interface{}) (Node, error) {
return self.getByPath(path...)
}
func (self *Searcher) getByPath(path ...interface{}) (Node, error) {
var err types.ParsingError
var start int
self.parser.p = 0
start, err = self.parser.getByPath(self.ValidateJSON, path...)
if err != 0 {
// for compatibility with old version
if err == types.ERR_NOT_FOUND {
return Node{}, ErrNotExist
}
if err == types.ERR_UNSUPPORT_TYPE {
panic("path must be either int(>=0) or string")
}
return Node{}, self.parser.syntaxError(err)
}
t := switchRawType(self.parser.s[start])
if t == _V_NONE {
return Node{}, self.parser.ExportError(err)
}
// copy string to reducing memory usage
var raw string
if self.CopyReturn {
raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p]))
} else {
raw = self.parser.s[start:self.parser.p]
}
return newRawNode(raw, t, self.ConcurrentRead), nil
}
// GetByPath searches a path and returns relaction and types of target
func _GetByPath(src string, path ...interface{}) (start int, end int, typ int, err error) {
p := NewParserObj(src)
s, e := p.getByPath(false, path...)
if e != 0 {
// for compatibility with old version
if e == types.ERR_NOT_FOUND {
return -1, -1, 0, ErrNotExist
}
if e == types.ERR_UNSUPPORT_TYPE {
panic("path must be either int(>=0) or string")
}
return -1, -1, 0, p.syntaxError(e)
}
t := switchRawType(p.s[s])
if t == _V_NONE {
return -1, -1, 0, ErrNotExist
}
if t == _V_NUMBER {
p.p = 1 + backward(p.s, p.p-1)
}
return s, p.p, int(t), nil
}
// ValidSyntax check if a json has a valid JSON syntax,
// while not validate UTF-8 charset
func _ValidSyntax(json string) bool {
p := NewParserObj(json)
_, e := p.skip()
if e != 0 {
return false
}
if skipBlank(p.s, p.p) != -int(types.ERR_EOF) {
return false
}
return true
}
// SkipFast skip a json value in fast-skip algs,
// while not strictly validate JSON syntax and UTF-8 charset.
func _SkipFast(src string, i int) (int, int, error) {
p := NewParserObj(src)
p.p = i
s, e := p.skipFast()
if e != 0 {
return -1, -1, p.ExportError(e)
}
t := switchRawType(p.s[s])
if t == _V_NONE {
return -1, -1, ErrNotExist
}
if t == _V_NUMBER {
p.p = 1 + backward(p.s, p.p-1)
}
return s, p.p, nil
}

View File

@@ -1,55 +0,0 @@
// +build !go1.20
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`unsafe`
`unicode/utf8`
`github.com/bytedance/sonic/internal/rt`
)
//go:noescape
//go:linkname memmove runtime.memmove
//goland:noinspection GoUnusedParameter
func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr)
//go:linkname unsafe_NewArray reflect.unsafe_NewArray
//goland:noinspection GoUnusedParameter
func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer
//go:linkname growslice runtime.growslice
//goland:noinspection GoUnusedParameter
func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice
//go:nosplit
func mem2ptr(s []byte) unsafe.Pointer {
return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr
}
var (
//go:linkname safeSet encoding/json.safeSet
safeSet [utf8.RuneSelf]bool
//go:linkname hex encoding/json.hex
hex string
)
//go:linkname unquoteBytes encoding/json.unquoteBytes
func unquoteBytes(s []byte) (t []byte, ok bool)

View File

@@ -1,55 +0,0 @@
// +build go1.20
/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`unsafe`
`unicode/utf8`
`github.com/bytedance/sonic/internal/rt`
)
//go:noescape
//go:linkname memmove runtime.memmove
//goland:noinspection GoUnusedParameter
func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr)
//go:linkname unsafe_NewArray reflect.unsafe_NewArray
//goland:noinspection GoUnusedParameter
func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer
//go:linkname growslice reflect.growslice
//goland:noinspection GoUnusedParameter
func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice
//go:nosplit
func mem2ptr(s []byte) unsafe.Pointer {
return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr
}
var (
//go:linkname safeSet encoding/json.safeSet
safeSet [utf8.RuneSelf]bool
//go:linkname hex encoding/json.hex
hex string
)
//go:linkname unquoteBytes encoding/json.unquoteBytes
func unquoteBytes(s []byte) (t []byte, ok bool)