处理AI胡乱生成的乱摊子

This commit is contained in:
2025-09-07 20:36:02 +08:00
parent ba513e0827
commit c4522b974b
403 changed files with 22915 additions and 44424 deletions

View File

@@ -17,36 +17,35 @@
package ast
import (
`encoding/base64`
`runtime`
`strconv`
`unsafe`
"encoding/base64"
"runtime"
"strconv"
"unsafe"
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
"github.com/bytedance/sonic/internal/native/types"
"github.com/bytedance/sonic/internal/rt"
"github.com/bytedance/sonic/internal/utils"
"github.com/bytedance/sonic/unquote"
)
const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
var bytesNull = []byte("null")
const (
bytesNull = "null"
strNull = "null"
bytesTrue = "true"
bytesFalse = "false"
bytesObject = "{}"
bytesArray = "[]"
)
func isSpace(c byte) bool {
return (int(1<<c) & _blankCharsMask) != 0
}
//go:nocheckptr
func skipBlank(src string, pos int) int {
se := uintptr(rt.IndexChar(src, len(src)))
sp := uintptr(rt.IndexChar(src, pos))
for sp < se {
if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
if !utils.IsSpace(*(*byte)(unsafe.Pointer(sp))) {
break
}
sp += 1
@@ -63,7 +62,7 @@ func decodeNull(src string, pos int) (ret int) {
if ret > len(src) {
return -int(types.ERR_EOF)
}
if src[pos:ret] == bytesNull {
if src[pos:ret] == strNull {
return ret
} else {
return -int(types.ERR_INVALID_CHAR)
@@ -103,13 +102,13 @@ func decodeString(src string, pos int) (ret int, v string) {
return ret, v
}
vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
if !ok {
result, err := unquote.String(src[pos:ret])
if err != 0 {
return -int(types.ERR_INVALID_CHAR), ""
}
runtime.KeepAlive(src)
return ret, rt.Mem2Str(vv)
return ret, result
}
func decodeBinary(src string, pos int) (ret int, v []byte) {
@@ -287,67 +286,7 @@ func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState)
//go:nocheckptr
func skipNumber(src string, pos int) (ret int) {
sp := uintptr(rt.IndexChar(src, pos))
se := uintptr(rt.IndexChar(src, len(src)))
if uintptr(sp) >= se {
return -int(types.ERR_EOF)
}
if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
sp += 1
}
ss := sp
var pointer bool
var exponent bool
var lastIsDigit bool
var nextNeedDigit = true
for ; sp < se; sp += uintptr(1) {
c := *(*byte)(unsafe.Pointer(sp))
if isDigit(c) {
lastIsDigit = true
nextNeedDigit = false
continue
} else if nextNeedDigit {
return -int(types.ERR_INVALID_CHAR)
} else if c == '.' {
if !lastIsDigit || pointer || exponent || sp == ss {
return -int(types.ERR_INVALID_CHAR)
}
pointer = true
lastIsDigit = false
nextNeedDigit = true
continue
} else if c == 'e' || c == 'E' {
if !lastIsDigit || exponent {
return -int(types.ERR_INVALID_CHAR)
}
if sp == se-1 {
return -int(types.ERR_EOF)
}
exponent = true
lastIsDigit = false
nextNeedDigit = false
continue
} else if c == '-' || c == '+' {
if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
return -int(types.ERR_INVALID_CHAR)
}
lastIsDigit = false
nextNeedDigit = true
continue
} else {
break
}
}
if nextNeedDigit {
return -int(types.ERR_EOF)
}
runtime.KeepAlive(src)
return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
return utils.SkipNumber(src, pos)
}
//go:nocheckptr
@@ -583,3 +522,36 @@ func skipArray(src string, pos int) (ret int, start int) {
pos++
}
}
// DecodeString decodes a JSON string from pos and return golang string.
// - needEsc indicates if to unescaped escaping chars
// - hasEsc tells if the returned string has escaping chars
// - validStr enables validating UTF8 charset
//
func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
if validStr && !validate_utf8(str) {
return "", -int(types.ERR_INVALID_UTF8), false
}
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := unquote.String(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}