71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
 * Copyright 2022 ByteDance Inc.
 | 
						|
 *
 | 
						|
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
 * you may not use this file except in compliance with the License.
 | 
						|
 * You may obtain a copy of the License at
 | 
						|
 *
 | 
						|
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 *
 | 
						|
 * Unless required by applicable law or agreed to in writing, software
 | 
						|
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
 * See the License for the specific language governing permissions and
 | 
						|
 * limitations under the License.
 | 
						|
 */
 | 
						|
 | 
						|
package utf8
 | 
						|
 | 
						|
import (
 | 
						|
    `github.com/bytedance/sonic/internal/rt`
 | 
						|
    `github.com/bytedance/sonic/internal/native/types`
 | 
						|
    `github.com/bytedance/sonic/internal/native`
 | 
						|
)
 | 
						|
 | 
						|
// CorrectWith corrects the invalid utf8 byte with repl string.
 | 
						|
func CorrectWith(dst []byte, src []byte, repl string) []byte {
 | 
						|
    sstr := rt.Mem2Str(src)
 | 
						|
    sidx := 0
 | 
						|
 | 
						|
    /* state machine records the invalid postions */
 | 
						|
    m := types.NewStateMachine()
 | 
						|
    m.Sp = 0 // invalid utf8 numbers
 | 
						|
 | 
						|
    for sidx < len(sstr) {
 | 
						|
        scur  := sidx
 | 
						|
        ecode := native.ValidateUTF8(&sstr, &sidx, m)
 | 
						|
 | 
						|
        if m.Sp != 0 {
 | 
						|
            if m.Sp > len(sstr) {
 | 
						|
                panic("numbers of invalid utf8 exceed the string len!")
 | 
						|
            }
 | 
						|
        }
 | 
						|
        
 | 
						|
        for i := 0; i < m.Sp; i++ {
 | 
						|
            ipos := m.Vt[i] // invalid utf8 position
 | 
						|
            dst  = append(dst, sstr[scur:ipos]...)
 | 
						|
            dst  = append(dst, repl...)
 | 
						|
            scur = m.Vt[i] + 1
 | 
						|
        }
 | 
						|
        /* append the remained valid utf8 bytes */
 | 
						|
        dst = append(dst, sstr[scur:sidx]...)
 | 
						|
 | 
						|
        /* not enough space, reset and continue */
 | 
						|
        if ecode != 0 {
 | 
						|
            m.Sp = 0
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    types.FreeStateMachine(m)
 | 
						|
    return dst
 | 
						|
}
 | 
						|
 | 
						|
// Validate is a simd-accelereated drop-in replacement for the standard library's utf8.Valid.
 | 
						|
func Validate(src []byte) bool {
 | 
						|
    return ValidateString(rt.Mem2Str(src))
 | 
						|
}
 | 
						|
 | 
						|
// ValidateString as Validate, but for string.
 | 
						|
func ValidateString(src string) bool {
 | 
						|
    return native.ValidateUTF8Fast(&src) == 0
 | 
						|
} |