73 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2015 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package precis
 | |
| 
 | |
| import (
 | |
| 	"unicode"
 | |
| 	"unicode/utf8"
 | |
| 
 | |
| 	"golang.org/x/text/transform"
 | |
| )
 | |
| 
 | |
| type nickAdditionalMapping struct {
 | |
| 	// TODO: This transformer needs to be stateless somehow…
 | |
| 	notStart  bool
 | |
| 	prevSpace bool
 | |
| }
 | |
| 
 | |
| func (t *nickAdditionalMapping) Reset() {
 | |
| 	t.prevSpace = false
 | |
| 	t.notStart = false
 | |
| }
 | |
| 
 | |
| func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 | |
| 	// RFC 8266 §2.1.  Rules
 | |
| 	//
 | |
| 	// 2.  Additional Mapping Rule: The additional mapping rule consists of
 | |
| 	//     the following sub-rules.
 | |
| 	//
 | |
| 	//     a.  Map any instances of non-ASCII space to SPACE (U+0020); a
 | |
| 	//         non-ASCII space is any Unicode code point having a general
 | |
| 	//         category of "Zs", naturally with the exception of SPACE
 | |
| 	//         (U+0020).  (The inclusion of only ASCII space prevents
 | |
| 	//         confusion with various non-ASCII space code points, many of
 | |
| 	//         which are difficult to reproduce across different input
 | |
| 	//         methods.)
 | |
| 	//
 | |
| 	//     b.  Remove any instances of the ASCII space character at the
 | |
| 	//         beginning or end of a nickname (e.g., "stpeter " is mapped to
 | |
| 	//         "stpeter").
 | |
| 	//
 | |
| 	//     c.  Map interior sequences of more than one ASCII space character
 | |
| 	//         to a single ASCII space character (e.g., "St  Peter" is
 | |
| 	//         mapped to "St Peter").
 | |
| 	for nSrc < len(src) {
 | |
| 		r, size := utf8.DecodeRune(src[nSrc:])
 | |
| 		if size == 0 { // Incomplete UTF-8 encoding
 | |
| 			if !atEOF {
 | |
| 				return nDst, nSrc, transform.ErrShortSrc
 | |
| 			}
 | |
| 			size = 1
 | |
| 		}
 | |
| 		if unicode.Is(unicode.Zs, r) {
 | |
| 			t.prevSpace = true
 | |
| 		} else {
 | |
| 			if t.prevSpace && t.notStart {
 | |
| 				dst[nDst] = ' '
 | |
| 				nDst += 1
 | |
| 			}
 | |
| 			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
 | |
| 				nDst += size
 | |
| 				return nDst, nSrc, transform.ErrShortDst
 | |
| 			}
 | |
| 			nDst += size
 | |
| 			t.prevSpace = false
 | |
| 			t.notStart = true
 | |
| 		}
 | |
| 		nSrc += size
 | |
| 	}
 | |
| 	return nDst, nSrc, nil
 | |
| }
 |