163 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			163 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2014 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| //go:generate go run gen.go gen_trieval.go
 | |
| 
 | |
| // Package cases provides general and language-specific case mappers.
 | |
| package cases // import "golang.org/x/text/cases"
 | |
| 
 | |
| import (
 | |
| 	"golang.org/x/text/language"
 | |
| 	"golang.org/x/text/transform"
 | |
| )
 | |
| 
 | |
| // References:
 | |
| // - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
 | |
| // - https://www.unicode.org/reports/tr29/
 | |
| // - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
 | |
| // - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
 | |
| // - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
 | |
| // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
 | |
| // - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
 | |
| // - http://userguide.icu-project.org/transforms/casemappings
 | |
| 
 | |
| // TODO:
 | |
| // - Case folding
 | |
| // - Wide and Narrow?
 | |
| // - Segmenter option for title casing.
 | |
| // - ASCII fast paths
 | |
| // - Encode Soft-Dotted property within trie somehow.
 | |
| 
 | |
| // A Caser transforms given input to a certain case. It implements
 | |
| // transform.Transformer.
 | |
| //
 | |
| // A Caser may be stateful and should therefore not be shared between
 | |
| // goroutines.
 | |
| type Caser struct {
 | |
| 	t transform.SpanningTransformer
 | |
| }
 | |
| 
 | |
| // Bytes returns a new byte slice with the result of converting b to the case
 | |
| // form implemented by c.
 | |
| func (c Caser) Bytes(b []byte) []byte {
 | |
| 	b, _, _ = transform.Bytes(c.t, b)
 | |
| 	return b
 | |
| }
 | |
| 
 | |
| // String returns a string with the result of transforming s to the case form
 | |
| // implemented by c.
 | |
| func (c Caser) String(s string) string {
 | |
| 	s, _, _ = transform.String(c.t, s)
 | |
| 	return s
 | |
| }
 | |
| 
 | |
| // Reset resets the Caser to be reused for new input after a previous call to
 | |
| // Transform.
 | |
| func (c Caser) Reset() { c.t.Reset() }
 | |
| 
 | |
| // Transform implements the transform.Transformer interface and transforms the
 | |
| // given input to the case form implemented by c.
 | |
| func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 | |
| 	return c.t.Transform(dst, src, atEOF)
 | |
| }
 | |
| 
 | |
| // Span implements the transform.SpanningTransformer interface.
 | |
| func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
 | |
| 	return c.t.Span(src, atEOF)
 | |
| }
 | |
| 
 | |
| // Upper returns a Caser for language-specific uppercasing.
 | |
| func Upper(t language.Tag, opts ...Option) Caser {
 | |
| 	return Caser{makeUpper(t, getOpts(opts...))}
 | |
| }
 | |
| 
 | |
| // Lower returns a Caser for language-specific lowercasing.
 | |
| func Lower(t language.Tag, opts ...Option) Caser {
 | |
| 	return Caser{makeLower(t, getOpts(opts...))}
 | |
| }
 | |
| 
 | |
| // Title returns a Caser for language-specific title casing. It uses an
 | |
| // approximation of the default Unicode Word Break algorithm.
 | |
| func Title(t language.Tag, opts ...Option) Caser {
 | |
| 	return Caser{makeTitle(t, getOpts(opts...))}
 | |
| }
 | |
| 
 | |
| // Fold returns a Caser that implements Unicode case folding. The returned Caser
 | |
| // is stateless and safe to use concurrently by multiple goroutines.
 | |
| //
 | |
| // Case folding does not normalize the input and may not preserve a normal form.
 | |
| // Use the collate or search package for more convenient and linguistically
 | |
| // sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
 | |
| // where security aspects are a concern.
 | |
| func Fold(opts ...Option) Caser {
 | |
| 	return Caser{makeFold(getOpts(opts...))}
 | |
| }
 | |
| 
 | |
| // An Option is used to modify the behavior of a Caser.
 | |
| type Option func(o options) options
 | |
| 
 | |
| // TODO: consider these options to take a boolean as well, like FinalSigma.
 | |
| // The advantage of using this approach is that other providers of a lower-case
 | |
| // algorithm could set different defaults by prefixing a user-provided slice
 | |
| // of options with their own. This is handy, for instance, for the precis
 | |
| // package which would override the default to not handle the Greek final sigma.
 | |
| 
 | |
| var (
 | |
| 	// NoLower disables the lowercasing of non-leading letters for a title
 | |
| 	// caser.
 | |
| 	NoLower Option = noLower
 | |
| 
 | |
| 	// Compact omits mappings in case folding for characters that would grow the
 | |
| 	// input. (Unimplemented.)
 | |
| 	Compact Option = compact
 | |
| )
 | |
| 
 | |
| // TODO: option to preserve a normal form, if applicable?
 | |
| 
 | |
| type options struct {
 | |
| 	noLower bool
 | |
| 	simple  bool
 | |
| 
 | |
| 	// TODO: segmenter, max ignorable, alternative versions, etc.
 | |
| 
 | |
| 	ignoreFinalSigma bool
 | |
| }
 | |
| 
 | |
| func getOpts(o ...Option) (res options) {
 | |
| 	for _, f := range o {
 | |
| 		res = f(res)
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func noLower(o options) options {
 | |
| 	o.noLower = true
 | |
| 	return o
 | |
| }
 | |
| 
 | |
| func compact(o options) options {
 | |
| 	o.simple = true
 | |
| 	return o
 | |
| }
 | |
| 
 | |
| // HandleFinalSigma specifies whether the special handling of Greek final sigma
 | |
| // should be enabled. Unicode prescribes handling the Greek final sigma for all
 | |
| // locales, but standards like IDNA and PRECIS override this default.
 | |
| func HandleFinalSigma(enable bool) Option {
 | |
| 	if enable {
 | |
| 		return handleFinalSigma
 | |
| 	}
 | |
| 	return ignoreFinalSigma
 | |
| }
 | |
| 
 | |
| func ignoreFinalSigma(o options) options {
 | |
| 	o.ignoreFinalSigma = true
 | |
| 	return o
 | |
| }
 | |
| 
 | |
| func handleFinalSigma(o options) options {
 | |
| 	o.ignoreFinalSigma = false
 | |
| 	return o
 | |
| }
 |