(feat) Add auto-discovery in k8s | Adarsh
This commit is contained in:
		
							
								
								
									
										198
									
								
								vendor/golang.org/x/text/unicode/bidi/bidi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								vendor/golang.org/x/text/unicode/bidi/bidi.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,198 @@ | ||||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| //go:generate go run gen.go gen_trieval.go gen_ranges.go | ||||
|  | ||||
| // Package bidi contains functionality for bidirectional text support. | ||||
| // | ||||
| // See https://www.unicode.org/reports/tr9. | ||||
| // | ||||
| // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways | ||||
| // and without notice. | ||||
| package bidi // import "golang.org/x/text/unicode/bidi" | ||||
|  | ||||
| // TODO: | ||||
| // The following functionality would not be hard to implement, but hinges on | ||||
| // the definition of a Segmenter interface. For now this is up to the user. | ||||
| // - Iterate over paragraphs | ||||
| // - Segmenter to iterate over runs directly from a given text. | ||||
| // Also: | ||||
| // - Transformer for reordering? | ||||
| // - Transformer (validator, really) for Bidi Rule. | ||||
|  | ||||
| // This API tries to avoid dealing with embedding levels for now. Under the hood | ||||
| // these will be computed, but the question is to which extent the user should | ||||
| // know they exist. We should at some point allow the user to specify an | ||||
| // embedding hierarchy, though. | ||||
|  | ||||
| // A Direction indicates the overall flow of text. | ||||
| type Direction int | ||||
|  | ||||
| const ( | ||||
| 	// LeftToRight indicates the text contains no right-to-left characters and | ||||
| 	// that either there are some left-to-right characters or the option | ||||
| 	// DefaultDirection(LeftToRight) was passed. | ||||
| 	LeftToRight Direction = iota | ||||
|  | ||||
| 	// RightToLeft indicates the text contains no left-to-right characters and | ||||
| 	// that either there are some right-to-left characters or the option | ||||
| 	// DefaultDirection(RightToLeft) was passed. | ||||
| 	RightToLeft | ||||
|  | ||||
| 	// Mixed indicates text contains both left-to-right and right-to-left | ||||
| 	// characters. | ||||
| 	Mixed | ||||
|  | ||||
| 	// Neutral means that text contains no left-to-right and right-to-left | ||||
| 	// characters and that no default direction has been set. | ||||
| 	Neutral | ||||
| ) | ||||
|  | ||||
| type options struct{} | ||||
|  | ||||
| // An Option is an option for Bidi processing. | ||||
| type Option func(*options) | ||||
|  | ||||
| // ICU allows the user to define embedding levels. This may be used, for example, | ||||
| // to use hierarchical structure of markup languages to define embeddings. | ||||
| // The following option may be a way to expose this functionality in this API. | ||||
| // // LevelFunc sets a function that associates nesting levels with the given text. | ||||
| // // The levels function will be called with monotonically increasing values for p. | ||||
| // func LevelFunc(levels func(p int) int) Option { | ||||
| // 	panic("unimplemented") | ||||
| // } | ||||
|  | ||||
| // DefaultDirection sets the default direction for a Paragraph. The direction is | ||||
| // overridden if the text contains directional characters. | ||||
| func DefaultDirection(d Direction) Option { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // A Paragraph holds a single Paragraph for Bidi processing. | ||||
| type Paragraph struct { | ||||
| 	// buffers | ||||
| } | ||||
|  | ||||
| // SetBytes configures p for the given paragraph text. It replaces text | ||||
| // previously set by SetBytes or SetString. If b contains a paragraph separator | ||||
| // it will only process the first paragraph and report the number of bytes | ||||
| // consumed from b including this separator. Error may be non-nil if options are | ||||
| // given. | ||||
| func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // SetString configures p for the given paragraph text. It replaces text | ||||
| // previously set by SetBytes or SetString. If b contains a paragraph separator | ||||
| // it will only process the first paragraph and report the number of bytes | ||||
| // consumed from b including this separator. Error may be non-nil if options are | ||||
| // given. | ||||
| func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // IsLeftToRight reports whether the principle direction of rendering for this | ||||
| // paragraphs is left-to-right. If this returns false, the principle direction | ||||
| // of rendering is right-to-left. | ||||
| func (p *Paragraph) IsLeftToRight() bool { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Direction returns the direction of the text of this paragraph. | ||||
| // | ||||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | ||||
| func (p *Paragraph) Direction() Direction { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // RunAt reports the Run at the given position of the input text. | ||||
| // | ||||
| // This method can be used for computing line breaks on paragraphs. | ||||
| func (p *Paragraph) RunAt(pos int) Run { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Order computes the visual ordering of all the runs in a Paragraph. | ||||
| func (p *Paragraph) Order() (Ordering, error) { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Line computes the visual ordering of runs for a single line starting and | ||||
| // ending at the given positions in the original text. | ||||
| func (p *Paragraph) Line(start, end int) (Ordering, error) { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // An Ordering holds the computed visual order of runs of a Paragraph. Calling | ||||
| // SetBytes or SetString on the originating Paragraph invalidates an Ordering. | ||||
| // The methods of an Ordering should only be called by one goroutine at a time. | ||||
| type Ordering struct{} | ||||
|  | ||||
| // Direction reports the directionality of the runs. | ||||
| // | ||||
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. | ||||
| func (o *Ordering) Direction() Direction { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // NumRuns returns the number of runs. | ||||
| func (o *Ordering) NumRuns() int { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Run returns the ith run within the ordering. | ||||
| func (o *Ordering) Run(i int) Run { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // TODO: perhaps with options. | ||||
| // // Reorder creates a reader that reads the runes in visual order per character. | ||||
| // // Modifiers remain after the runes they modify. | ||||
| // func (l *Runs) Reorder() io.Reader { | ||||
| // 	panic("unimplemented") | ||||
| // } | ||||
|  | ||||
| // A Run is a continuous sequence of characters of a single direction. | ||||
| type Run struct { | ||||
| } | ||||
|  | ||||
| // String returns the text of the run in its original order. | ||||
| func (r *Run) String() string { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Bytes returns the text of the run in its original order. | ||||
| func (r *Run) Bytes() []byte { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // TODO: methods for | ||||
| // - Display order | ||||
| // - headers and footers | ||||
| // - bracket replacement. | ||||
|  | ||||
| // Direction reports the direction of the run. | ||||
| func (r *Run) Direction() Direction { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // Position of the Run within the text passed to SetBytes or SetString of the | ||||
| // originating Paragraph value. | ||||
| func (r *Run) Pos() (start, end int) { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // AppendReverse reverses the order of characters of in, appends them to out, | ||||
| // and returns the result. Modifiers will still follow the runes they modify. | ||||
| // Brackets are replaced with their counterparts. | ||||
| func AppendReverse(out, in []byte) []byte { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
|  | ||||
| // ReverseString reverses the order of characters in s and returns a new string. | ||||
| // Modifiers will still follow the runes they modify. Brackets are replaced with | ||||
| // their counterparts. | ||||
| func ReverseString(s string) string { | ||||
| 	panic("unimplemented") | ||||
| } | ||||
							
								
								
									
										335
									
								
								vendor/golang.org/x/text/unicode/bidi/bracket.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										335
									
								
								vendor/golang.org/x/text/unicode/bidi/bracket.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,335 @@ | ||||
| // Copyright 2015 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| import ( | ||||
| 	"container/list" | ||||
| 	"fmt" | ||||
| 	"sort" | ||||
| ) | ||||
|  | ||||
| // This file contains a port of the reference implementation of the | ||||
| // Bidi Parentheses Algorithm: | ||||
| // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java | ||||
| // | ||||
| // The implementation in this file covers definitions BD14-BD16 and rule N0 | ||||
| // of UAX#9. | ||||
| // | ||||
| // Some preprocessing is done for each rune before data is passed to this | ||||
| // algorithm: | ||||
| //  - opening and closing brackets are identified | ||||
| //  - a bracket pair type, like '(' and ')' is assigned a unique identifier that | ||||
| //    is identical for the opening and closing bracket. It is left to do these | ||||
| //    mappings. | ||||
| //  - The BPA algorithm requires that bracket characters that are canonical | ||||
| //    equivalents of each other be able to be substituted for each other. | ||||
| //    It is the responsibility of the caller to do this canonicalization. | ||||
| // | ||||
| // In implementing BD16, this implementation departs slightly from the "logical" | ||||
| // algorithm defined in UAX#9. In particular, the stack referenced there | ||||
| // supports operations that go beyond a "basic" stack. An equivalent | ||||
| // implementation based on a linked list is used here. | ||||
|  | ||||
| // Bidi_Paired_Bracket_Type | ||||
| // BD14. An opening paired bracket is a character whose | ||||
| // Bidi_Paired_Bracket_Type property value is Open. | ||||
| // | ||||
| // BD15. A closing paired bracket is a character whose | ||||
| // Bidi_Paired_Bracket_Type property value is Close. | ||||
| type bracketType byte | ||||
|  | ||||
| const ( | ||||
| 	bpNone bracketType = iota | ||||
| 	bpOpen | ||||
| 	bpClose | ||||
| ) | ||||
|  | ||||
| // bracketPair holds a pair of index values for opening and closing bracket | ||||
| // location of a bracket pair. | ||||
| type bracketPair struct { | ||||
| 	opener int | ||||
| 	closer int | ||||
| } | ||||
|  | ||||
| func (b *bracketPair) String() string { | ||||
| 	return fmt.Sprintf("(%v, %v)", b.opener, b.closer) | ||||
| } | ||||
|  | ||||
| // bracketPairs is a slice of bracketPairs with a sort.Interface implementation. | ||||
| type bracketPairs []bracketPair | ||||
|  | ||||
| func (b bracketPairs) Len() int           { return len(b) } | ||||
| func (b bracketPairs) Swap(i, j int)      { b[i], b[j] = b[j], b[i] } | ||||
| func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } | ||||
|  | ||||
| // resolvePairedBrackets runs the paired bracket part of the UBA algorithm. | ||||
| // | ||||
| // For each rune, it takes the indexes into the original string, the class the | ||||
| // bracket type (in pairTypes) and the bracket identifier (pairValues). It also | ||||
| // takes the direction type for the start-of-sentence and the embedding level. | ||||
| // | ||||
| // The identifiers for bracket types are the rune of the canonicalized opening | ||||
| // bracket for brackets (open or close) or 0 for runes that are not brackets. | ||||
| func resolvePairedBrackets(s *isolatingRunSequence) { | ||||
| 	p := bracketPairer{ | ||||
| 		sos:              s.sos, | ||||
| 		openers:          list.New(), | ||||
| 		codesIsolatedRun: s.types, | ||||
| 		indexes:          s.indexes, | ||||
| 	} | ||||
| 	dirEmbed := L | ||||
| 	if s.level&1 != 0 { | ||||
| 		dirEmbed = R | ||||
| 	} | ||||
| 	p.locateBrackets(s.p.pairTypes, s.p.pairValues) | ||||
| 	p.resolveBrackets(dirEmbed, s.p.initialTypes) | ||||
| } | ||||
|  | ||||
| type bracketPairer struct { | ||||
| 	sos Class // direction corresponding to start of sequence | ||||
|  | ||||
| 	// The following is a restatement of BD 16 using non-algorithmic language. | ||||
| 	// | ||||
| 	// A bracket pair is a pair of characters consisting of an opening | ||||
| 	// paired bracket and a closing paired bracket such that the | ||||
| 	// Bidi_Paired_Bracket property value of the former equals the latter, | ||||
| 	// subject to the following constraints. | ||||
| 	// - both characters of a pair occur in the same isolating run sequence | ||||
| 	// - the closing character of a pair follows the opening character | ||||
| 	// - any bracket character can belong at most to one pair, the earliest possible one | ||||
| 	// - any bracket character not part of a pair is treated like an ordinary character | ||||
| 	// - pairs may nest properly, but their spans may not overlap otherwise | ||||
|  | ||||
| 	// Bracket characters with canonical decompositions are supposed to be | ||||
| 	// treated as if they had been normalized, to allow normalized and non- | ||||
| 	// normalized text to give the same result. In this implementation that step | ||||
| 	// is pushed out to the caller. The caller has to ensure that the pairValue | ||||
| 	// slices contain the rune of the opening bracket after normalization for | ||||
| 	// any opening or closing bracket. | ||||
|  | ||||
| 	openers *list.List // list of positions for opening brackets | ||||
|  | ||||
| 	// bracket pair positions sorted by location of opening bracket | ||||
| 	pairPositions bracketPairs | ||||
|  | ||||
| 	codesIsolatedRun []Class // directional bidi codes for an isolated run | ||||
| 	indexes          []int   // array of index values into the original string | ||||
|  | ||||
| } | ||||
|  | ||||
| // matchOpener reports whether characters at given positions form a matching | ||||
| // bracket pair. | ||||
| func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { | ||||
| 	return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] | ||||
| } | ||||
|  | ||||
| const maxPairingDepth = 63 | ||||
|  | ||||
| // locateBrackets locates matching bracket pairs according to BD16. | ||||
| // | ||||
| // This implementation uses a linked list instead of a stack, because, while | ||||
| // elements are added at the front (like a push) they are not generally removed | ||||
| // in atomic 'pop' operations, reducing the benefit of the stack archetype. | ||||
| func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { | ||||
| 	// traverse the run | ||||
| 	// do that explicitly (not in a for-each) so we can record position | ||||
| 	for i, index := range p.indexes { | ||||
|  | ||||
| 		// look at the bracket type for each character | ||||
| 		if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON { | ||||
| 			// continue scanning | ||||
| 			continue | ||||
| 		} | ||||
| 		switch pairTypes[index] { | ||||
| 		case bpOpen: | ||||
| 			// check if maximum pairing depth reached | ||||
| 			if p.openers.Len() == maxPairingDepth { | ||||
| 				p.openers.Init() | ||||
| 				return | ||||
| 			} | ||||
| 			// remember opener location, most recent first | ||||
| 			p.openers.PushFront(i) | ||||
|  | ||||
| 		case bpClose: | ||||
| 			// see if there is a match | ||||
| 			count := 0 | ||||
| 			for elem := p.openers.Front(); elem != nil; elem = elem.Next() { | ||||
| 				count++ | ||||
| 				opener := elem.Value.(int) | ||||
| 				if p.matchOpener(pairValues, opener, i) { | ||||
| 					// if the opener matches, add nested pair to the ordered list | ||||
| 					p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) | ||||
| 					// remove up to and including matched opener | ||||
| 					for ; count > 0; count-- { | ||||
| 						p.openers.Remove(p.openers.Front()) | ||||
| 					} | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			sort.Sort(p.pairPositions) | ||||
| 			// if we get here, the closing bracket matched no openers | ||||
| 			// and gets ignored | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Bracket pairs within an isolating run sequence are processed as units so | ||||
| // that both the opening and the closing paired bracket in a pair resolve to | ||||
| // the same direction. | ||||
| // | ||||
| // N0. Process bracket pairs in an isolating run sequence sequentially in | ||||
| // the logical order of the text positions of the opening paired brackets | ||||
| // using the logic given below. Within this scope, bidirectional types EN | ||||
| // and AN are treated as R. | ||||
| // | ||||
| // Identify the bracket pairs in the current isolating run sequence | ||||
| // according to BD16. For each bracket-pair element in the list of pairs of | ||||
| // text positions: | ||||
| // | ||||
| // a Inspect the bidirectional types of the characters enclosed within the | ||||
| // bracket pair. | ||||
| // | ||||
| // b If any strong type (either L or R) matching the embedding direction is | ||||
| // found, set the type for both brackets in the pair to match the embedding | ||||
| // direction. | ||||
| // | ||||
| // o [ e ] o -> o e e e o | ||||
| // | ||||
| // o [ o e ] -> o e o e e | ||||
| // | ||||
| // o [ NI e ] -> o e NI e e | ||||
| // | ||||
| // c Otherwise, if a strong type (opposite the embedding direction) is | ||||
| // found, test for adjacent strong types as follows: 1 First, check | ||||
| // backwards before the opening paired bracket until the first strong type | ||||
| // (L, R, or sos) is found. If that first preceding strong type is opposite | ||||
| // the embedding direction, then set the type for both brackets in the pair | ||||
| // to that type. 2 Otherwise, set the type for both brackets in the pair to | ||||
| // the embedding direction. | ||||
| // | ||||
| // o [ o ] e -> o o o o e | ||||
| // | ||||
| // o [ o NI ] o -> o o o NI o o | ||||
| // | ||||
| // e [ o ] o -> e e o e o | ||||
| // | ||||
| // e [ o ] e -> e e o e e | ||||
| // | ||||
| // e ( o [ o ] NI ) e -> e e o o o o NI e e | ||||
| // | ||||
| // d Otherwise, do not set the type for the current bracket pair. Note that | ||||
| // if the enclosed text contains no strong types the paired brackets will | ||||
| // both resolve to the same level when resolved individually using rules N1 | ||||
| // and N2. | ||||
| // | ||||
| // e ( NI ) o -> e ( NI ) o | ||||
|  | ||||
| // getStrongTypeN0 maps character's directional code to strong type as required | ||||
| // by rule N0. | ||||
| // | ||||
| // TODO: have separate type for "strong" directionality. | ||||
| func (p *bracketPairer) getStrongTypeN0(index int) Class { | ||||
| 	switch p.codesIsolatedRun[index] { | ||||
| 	// in the scope of N0, number types are treated as R | ||||
| 	case EN, AN, AL, R: | ||||
| 		return R | ||||
| 	case L: | ||||
| 		return L | ||||
| 	default: | ||||
| 		return ON | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // classifyPairContent reports the strong types contained inside a Bracket Pair, | ||||
| // assuming the given embedding direction. | ||||
| // | ||||
| // It returns ON if no strong type is found. If a single strong type is found, | ||||
| // it returns this type. Otherwise it returns the embedding direction. | ||||
| // | ||||
| // TODO: use separate type for "strong" directionality. | ||||
| func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class { | ||||
| 	dirOpposite := ON | ||||
| 	for i := loc.opener + 1; i < loc.closer; i++ { | ||||
| 		dir := p.getStrongTypeN0(i) | ||||
| 		if dir == ON { | ||||
| 			continue | ||||
| 		} | ||||
| 		if dir == dirEmbed { | ||||
| 			return dir // type matching embedding direction found | ||||
| 		} | ||||
| 		dirOpposite = dir | ||||
| 	} | ||||
| 	// return ON if no strong type found, or class opposite to dirEmbed | ||||
| 	return dirOpposite | ||||
| } | ||||
|  | ||||
| // classBeforePair determines which strong types are present before a Bracket | ||||
| // Pair. Return R or L if strong type found, otherwise ON. | ||||
| func (p *bracketPairer) classBeforePair(loc bracketPair) Class { | ||||
| 	for i := loc.opener - 1; i >= 0; i-- { | ||||
| 		if dir := p.getStrongTypeN0(i); dir != ON { | ||||
| 			return dir | ||||
| 		} | ||||
| 	} | ||||
| 	// no strong types found, return sos | ||||
| 	return p.sos | ||||
| } | ||||
|  | ||||
| // assignBracketType implements rule N0 for a single bracket pair. | ||||
| func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) { | ||||
| 	// rule "N0, a", inspect contents of pair | ||||
| 	dirPair := p.classifyPairContent(loc, dirEmbed) | ||||
|  | ||||
| 	// dirPair is now L, R, or N (no strong type found) | ||||
|  | ||||
| 	// the following logical tests are performed out of order compared to | ||||
| 	// the statement of the rules but yield the same results | ||||
| 	if dirPair == ON { | ||||
| 		return // case "d" - nothing to do | ||||
| 	} | ||||
|  | ||||
| 	if dirPair != dirEmbed { | ||||
| 		// case "c": strong type found, opposite - check before (c.1) | ||||
| 		dirPair = p.classBeforePair(loc) | ||||
| 		if dirPair == dirEmbed || dirPair == ON { | ||||
| 			// no strong opposite type found before - use embedding (c.2) | ||||
| 			dirPair = dirEmbed | ||||
| 		} | ||||
| 	} | ||||
| 	// else: case "b", strong type found matching embedding, | ||||
| 	// no explicit action needed, as dirPair is already set to embedding | ||||
| 	// direction | ||||
|  | ||||
| 	// set the bracket types to the type found | ||||
| 	p.setBracketsToType(loc, dirPair, initialTypes) | ||||
| } | ||||
|  | ||||
| func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) { | ||||
| 	p.codesIsolatedRun[loc.opener] = dirPair | ||||
| 	p.codesIsolatedRun[loc.closer] = dirPair | ||||
|  | ||||
| 	for i := loc.opener + 1; i < loc.closer; i++ { | ||||
| 		index := p.indexes[i] | ||||
| 		if initialTypes[index] != NSM { | ||||
| 			break | ||||
| 		} | ||||
| 		p.codesIsolatedRun[i] = dirPair | ||||
| 	} | ||||
|  | ||||
| 	for i := loc.closer + 1; i < len(p.indexes); i++ { | ||||
| 		index := p.indexes[i] | ||||
| 		if initialTypes[index] != NSM { | ||||
| 			break | ||||
| 		} | ||||
| 		p.codesIsolatedRun[i] = dirPair | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // resolveBrackets implements rule N0 for a list of pairs. | ||||
| func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) { | ||||
| 	for _, loc := range p.pairPositions { | ||||
| 		p.assignBracketType(loc, dirEmbed, initialTypes) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										1058
									
								
								vendor/golang.org/x/text/unicode/bidi/core.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1058
									
								
								vendor/golang.org/x/text/unicode/bidi/core.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										206
									
								
								vendor/golang.org/x/text/unicode/bidi/prop.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										206
									
								
								vendor/golang.org/x/text/unicode/bidi/prop.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,206 @@ | ||||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| // Properties provides access to BiDi properties of runes. | ||||
| type Properties struct { | ||||
| 	entry uint8 | ||||
| 	last  uint8 | ||||
| } | ||||
|  | ||||
| var trie = newBidiTrie(0) | ||||
|  | ||||
| // TODO: using this for bidirule reduces the running time by about 5%. Consider | ||||
| // if this is worth exposing or if we can find a way to speed up the Class | ||||
| // method. | ||||
| // | ||||
| // // CompactClass is like Class, but maps all of the BiDi control classes | ||||
| // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control. | ||||
| // func (p Properties) CompactClass() Class { | ||||
| // 	return Class(p.entry & 0x0F) | ||||
| // } | ||||
|  | ||||
| // Class returns the Bidi class for p. | ||||
| func (p Properties) Class() Class { | ||||
| 	c := Class(p.entry & 0x0F) | ||||
| 	if c == Control { | ||||
| 		c = controlByteToClass[p.last&0xF] | ||||
| 	} | ||||
| 	return c | ||||
| } | ||||
|  | ||||
| // IsBracket reports whether the rune is a bracket. | ||||
| func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 } | ||||
|  | ||||
| // IsOpeningBracket reports whether the rune is an opening bracket. | ||||
| // IsBracket must return true. | ||||
| func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 } | ||||
|  | ||||
| // TODO: find a better API and expose. | ||||
| func (p Properties) reverseBracket(r rune) rune { | ||||
| 	return xorMasks[p.entry>>xorMaskShift] ^ r | ||||
| } | ||||
|  | ||||
| var controlByteToClass = [16]Class{ | ||||
| 	0xD: LRO, // U+202D LeftToRightOverride, | ||||
| 	0xE: RLO, // U+202E RightToLeftOverride, | ||||
| 	0xA: LRE, // U+202A LeftToRightEmbedding, | ||||
| 	0xB: RLE, // U+202B RightToLeftEmbedding, | ||||
| 	0xC: PDF, // U+202C PopDirectionalFormat, | ||||
| 	0x6: LRI, // U+2066 LeftToRightIsolate, | ||||
| 	0x7: RLI, // U+2067 RightToLeftIsolate, | ||||
| 	0x8: FSI, // U+2068 FirstStrongIsolate, | ||||
| 	0x9: PDI, // U+2069 PopDirectionalIsolate, | ||||
| } | ||||
|  | ||||
| // LookupRune returns properties for r. | ||||
| func LookupRune(r rune) (p Properties, size int) { | ||||
| 	var buf [4]byte | ||||
| 	n := utf8.EncodeRune(buf[:], r) | ||||
| 	return Lookup(buf[:n]) | ||||
| } | ||||
|  | ||||
| // TODO: these lookup methods are based on the generated trie code. The returned | ||||
| // sizes have slightly different semantics from the generated code, in that it | ||||
| // always returns size==1 for an illegal UTF-8 byte (instead of the length | ||||
| // of the maximum invalid subsequence). Most Transformers, like unicode/norm, | ||||
| // leave invalid UTF-8 untouched, in which case it has performance benefits to | ||||
| // do so (without changing the semantics). Bidi requires the semantics used here | ||||
| // for the bidirule implementation to be compatible with the Go semantics. | ||||
| //  They ultimately should perhaps be adopted by all trie implementations, for | ||||
| // convenience sake. | ||||
| // This unrolled code also boosts performance of the secure/bidirule package by | ||||
| // about 30%. | ||||
| // So, to remove this code: | ||||
| //   - add option to trie generator to define return type. | ||||
| //   - always return 1 byte size for ill-formed UTF-8 runes. | ||||
|  | ||||
| // Lookup returns properties for the first rune in s and the width in bytes of | ||||
| // its encoding. The size will be 0 if s does not hold enough bytes to complete | ||||
| // the encoding. | ||||
| func Lookup(s []byte) (p Properties, sz int) { | ||||
| 	c0 := s[0] | ||||
| 	switch { | ||||
| 	case c0 < 0x80: // is ASCII | ||||
| 		return Properties{entry: bidiValues[c0]}, 1 | ||||
| 	case c0 < 0xC2: | ||||
| 		return Properties{}, 1 | ||||
| 	case c0 < 0xE0: // 2-byte UTF-8 | ||||
| 		if len(s) < 2 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 | ||||
| 	case c0 < 0xF0: // 3-byte UTF-8 | ||||
| 		if len(s) < 3 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 | ||||
| 	case c0 < 0xF8: // 4-byte UTF-8 | ||||
| 		if len(s) < 4 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o = uint32(i)<<6 + uint32(c2) | ||||
| 		i = bidiIndex[o] | ||||
| 		c3 := s[3] | ||||
| 		if c3 < 0x80 || 0xC0 <= c3 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 | ||||
| 	} | ||||
| 	// Illegal rune | ||||
| 	return Properties{}, 1 | ||||
| } | ||||
|  | ||||
| // LookupString returns properties for the first rune in s and the width in | ||||
| // bytes of its encoding. The size will be 0 if s does not hold enough bytes to | ||||
| // complete the encoding. | ||||
| func LookupString(s string) (p Properties, sz int) { | ||||
| 	c0 := s[0] | ||||
| 	switch { | ||||
| 	case c0 < 0x80: // is ASCII | ||||
| 		return Properties{entry: bidiValues[c0]}, 1 | ||||
| 	case c0 < 0xC2: | ||||
| 		return Properties{}, 1 | ||||
| 	case c0 < 0xE0: // 2-byte UTF-8 | ||||
| 		if len(s) < 2 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2 | ||||
| 	case c0 < 0xF0: // 3-byte UTF-8 | ||||
| 		if len(s) < 3 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3 | ||||
| 	case c0 < 0xF8: // 4-byte UTF-8 | ||||
| 		if len(s) < 4 { | ||||
| 			return Properties{}, 0 | ||||
| 		} | ||||
| 		i := bidiIndex[c0] | ||||
| 		c1 := s[1] | ||||
| 		if c1 < 0x80 || 0xC0 <= c1 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o := uint32(i)<<6 + uint32(c1) | ||||
| 		i = bidiIndex[o] | ||||
| 		c2 := s[2] | ||||
| 		if c2 < 0x80 || 0xC0 <= c2 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		o = uint32(i)<<6 + uint32(c2) | ||||
| 		i = bidiIndex[o] | ||||
| 		c3 := s[3] | ||||
| 		if c3 < 0x80 || 0xC0 <= c3 { | ||||
| 			return Properties{}, 1 | ||||
| 		} | ||||
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4 | ||||
| 	} | ||||
| 	// Illegal rune | ||||
| 	return Properties{}, 1 | ||||
| } | ||||
							
								
								
									
										1815
									
								
								vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1815
									
								
								vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1887
									
								
								vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1887
									
								
								vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1781
									
								
								vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1781
									
								
								vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										60
									
								
								vendor/golang.org/x/text/unicode/bidi/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								vendor/golang.org/x/text/unicode/bidi/trieval.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,60 @@ | ||||
| // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. | ||||
|  | ||||
| package bidi | ||||
|  | ||||
| // Class is the Unicode BiDi class. Each rune has a single class. | ||||
| type Class uint | ||||
|  | ||||
| const ( | ||||
| 	L       Class = iota // LeftToRight | ||||
| 	R                    // RightToLeft | ||||
| 	EN                   // EuropeanNumber | ||||
| 	ES                   // EuropeanSeparator | ||||
| 	ET                   // EuropeanTerminator | ||||
| 	AN                   // ArabicNumber | ||||
| 	CS                   // CommonSeparator | ||||
| 	B                    // ParagraphSeparator | ||||
| 	S                    // SegmentSeparator | ||||
| 	WS                   // WhiteSpace | ||||
| 	ON                   // OtherNeutral | ||||
| 	BN                   // BoundaryNeutral | ||||
| 	NSM                  // NonspacingMark | ||||
| 	AL                   // ArabicLetter | ||||
| 	Control              // Control LRO - PDI | ||||
|  | ||||
| 	numClass | ||||
|  | ||||
| 	LRO // LeftToRightOverride | ||||
| 	RLO // RightToLeftOverride | ||||
| 	LRE // LeftToRightEmbedding | ||||
| 	RLE // RightToLeftEmbedding | ||||
| 	PDF // PopDirectionalFormat | ||||
| 	LRI // LeftToRightIsolate | ||||
| 	RLI // RightToLeftIsolate | ||||
| 	FSI // FirstStrongIsolate | ||||
| 	PDI // PopDirectionalIsolate | ||||
|  | ||||
| 	unknownClass = ^Class(0) | ||||
| ) | ||||
|  | ||||
| var controlToClass = map[rune]Class{ | ||||
| 	0x202D: LRO, // LeftToRightOverride, | ||||
| 	0x202E: RLO, // RightToLeftOverride, | ||||
| 	0x202A: LRE, // LeftToRightEmbedding, | ||||
| 	0x202B: RLE, // RightToLeftEmbedding, | ||||
| 	0x202C: PDF, // PopDirectionalFormat, | ||||
| 	0x2066: LRI, // LeftToRightIsolate, | ||||
| 	0x2067: RLI, // RightToLeftIsolate, | ||||
| 	0x2068: FSI, // FirstStrongIsolate, | ||||
| 	0x2069: PDI, // PopDirectionalIsolate, | ||||
| } | ||||
|  | ||||
| // A trie entry has the following bits: | ||||
| // 7..5  XOR mask for brackets | ||||
| // 4     1: Bracket open, 0: Bracket close | ||||
| // 3..0  Class type | ||||
|  | ||||
| const ( | ||||
| 	openMask     = 0x10 | ||||
| 	xorMaskShift = 5 | ||||
| ) | ||||
							
								
								
									
										512
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										512
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,512 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| const ( | ||||
| 	maxNonStarters = 30 | ||||
| 	// The maximum number of characters needed for a buffer is | ||||
| 	// maxNonStarters + 1 for the starter + 1 for the GCJ | ||||
| 	maxBufferSize    = maxNonStarters + 2 | ||||
| 	maxNFCExpansion  = 3  // NFC(0x1D160) | ||||
| 	maxNFKCExpansion = 18 // NFKC(0xFDFA) | ||||
|  | ||||
| 	maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128 | ||||
| ) | ||||
|  | ||||
| // ssState is used for reporting the segment state after inserting a rune. | ||||
| // It is returned by streamSafe.next. | ||||
| type ssState int | ||||
|  | ||||
| const ( | ||||
| 	// Indicates a rune was successfully added to the segment. | ||||
| 	ssSuccess ssState = iota | ||||
| 	// Indicates a rune starts a new segment and should not be added. | ||||
| 	ssStarter | ||||
| 	// Indicates a rune caused a segment overflow and a CGJ should be inserted. | ||||
| 	ssOverflow | ||||
| ) | ||||
|  | ||||
| // streamSafe implements the policy of when a CGJ should be inserted. | ||||
| type streamSafe uint8 | ||||
|  | ||||
| // first inserts the first rune of a segment. It is a faster version of next if | ||||
| // it is known p represents the first rune in a segment. | ||||
| func (ss *streamSafe) first(p Properties) { | ||||
| 	*ss = streamSafe(p.nTrailingNonStarters()) | ||||
| } | ||||
|  | ||||
| // insert returns a ssState value to indicate whether a rune represented by p | ||||
| // can be inserted. | ||||
| func (ss *streamSafe) next(p Properties) ssState { | ||||
| 	if *ss > maxNonStarters { | ||||
| 		panic("streamSafe was not reset") | ||||
| 	} | ||||
| 	n := p.nLeadingNonStarters() | ||||
| 	if *ss += streamSafe(n); *ss > maxNonStarters { | ||||
| 		*ss = 0 | ||||
| 		return ssOverflow | ||||
| 	} | ||||
| 	// The Stream-Safe Text Processing prescribes that the counting can stop | ||||
| 	// as soon as a starter is encountered. However, there are some starters, | ||||
| 	// like Jamo V and T, that can combine with other runes, leaving their | ||||
| 	// successive non-starters appended to the previous, possibly causing an | ||||
| 	// overflow. We will therefore consider any rune with a non-zero nLead to | ||||
| 	// be a non-starter. Note that it always hold that if nLead > 0 then | ||||
| 	// nLead == nTrail. | ||||
| 	if n == 0 { | ||||
| 		*ss = streamSafe(p.nTrailingNonStarters()) | ||||
| 		return ssStarter | ||||
| 	} | ||||
| 	return ssSuccess | ||||
| } | ||||
|  | ||||
| // backwards is used for checking for overflow and segment starts | ||||
| // when traversing a string backwards. Users do not need to call first | ||||
| // for the first rune. The state of the streamSafe retains the count of | ||||
| // the non-starters loaded. | ||||
| func (ss *streamSafe) backwards(p Properties) ssState { | ||||
| 	if *ss > maxNonStarters { | ||||
| 		panic("streamSafe was not reset") | ||||
| 	} | ||||
| 	c := *ss + streamSafe(p.nTrailingNonStarters()) | ||||
| 	if c > maxNonStarters { | ||||
| 		return ssOverflow | ||||
| 	} | ||||
| 	*ss = c | ||||
| 	if p.nLeadingNonStarters() == 0 { | ||||
| 		return ssStarter | ||||
| 	} | ||||
| 	return ssSuccess | ||||
| } | ||||
|  | ||||
| func (ss streamSafe) isMax() bool { | ||||
| 	return ss == maxNonStarters | ||||
| } | ||||
|  | ||||
| // GraphemeJoiner is inserted after maxNonStarters non-starter runes. | ||||
| const GraphemeJoiner = "\u034F" | ||||
|  | ||||
| // reorderBuffer is used to normalize a single segment.  Characters inserted with | ||||
| // insert are decomposed and reordered based on CCC. The compose method can | ||||
| // be used to recombine characters.  Note that the byte buffer does not hold | ||||
| // the UTF-8 characters in order.  Only the rune array is maintained in sorted | ||||
| // order. flush writes the resulting segment to a byte array. | ||||
| type reorderBuffer struct { | ||||
| 	rune  [maxBufferSize]Properties // Per character info. | ||||
| 	byte  [maxByteBufferSize]byte   // UTF-8 buffer. Referenced by runeInfo.pos. | ||||
| 	nbyte uint8                     // Number or bytes. | ||||
| 	ss    streamSafe                // For limiting length of non-starter sequence. | ||||
| 	nrune int                       // Number of runeInfos. | ||||
| 	f     formInfo | ||||
|  | ||||
| 	src      input | ||||
| 	nsrc     int | ||||
| 	tmpBytes input | ||||
|  | ||||
| 	out    []byte | ||||
| 	flushF func(*reorderBuffer) bool | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) init(f Form, src []byte) { | ||||
| 	rb.f = *formTable[f] | ||||
| 	rb.src.setBytes(src) | ||||
| 	rb.nsrc = len(src) | ||||
| 	rb.ss = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) initString(f Form, src string) { | ||||
| 	rb.f = *formTable[f] | ||||
| 	rb.src.setString(src) | ||||
| 	rb.nsrc = len(src) | ||||
| 	rb.ss = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) { | ||||
| 	rb.out = out | ||||
| 	rb.flushF = f | ||||
| } | ||||
|  | ||||
| // reset discards all characters from the buffer. | ||||
| func (rb *reorderBuffer) reset() { | ||||
| 	rb.nrune = 0 | ||||
| 	rb.nbyte = 0 | ||||
| } | ||||
|  | ||||
| func (rb *reorderBuffer) doFlush() bool { | ||||
| 	if rb.f.composing { | ||||
| 		rb.compose() | ||||
| 	} | ||||
| 	res := rb.flushF(rb) | ||||
| 	rb.reset() | ||||
| 	return res | ||||
| } | ||||
|  | ||||
| // appendFlush appends the normalized segment to rb.out. | ||||
| func appendFlush(rb *reorderBuffer) bool { | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		start := rb.rune[i].pos | ||||
| 		end := start + rb.rune[i].size | ||||
| 		rb.out = append(rb.out, rb.byte[start:end]...) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // flush appends the normalized segment to out and resets rb. | ||||
| func (rb *reorderBuffer) flush(out []byte) []byte { | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		start := rb.rune[i].pos | ||||
| 		end := start + rb.rune[i].size | ||||
| 		out = append(out, rb.byte[start:end]...) | ||||
| 	} | ||||
| 	rb.reset() | ||||
| 	return out | ||||
| } | ||||
|  | ||||
| // flushCopy copies the normalized segment to buf and resets rb. | ||||
| // It returns the number of bytes written to buf. | ||||
| func (rb *reorderBuffer) flushCopy(buf []byte) int { | ||||
| 	p := 0 | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		runep := rb.rune[i] | ||||
| 		p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size]) | ||||
| 	} | ||||
| 	rb.reset() | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| // insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class. | ||||
| // It returns false if the buffer is not large enough to hold the rune. | ||||
| // It is used internally by insert and insertString only. | ||||
| func (rb *reorderBuffer) insertOrdered(info Properties) { | ||||
| 	n := rb.nrune | ||||
| 	b := rb.rune[:] | ||||
| 	cc := info.ccc | ||||
| 	if cc > 0 { | ||||
| 		// Find insertion position + move elements to make room. | ||||
| 		for ; n > 0; n-- { | ||||
| 			if b[n-1].ccc <= cc { | ||||
| 				break | ||||
| 			} | ||||
| 			b[n] = b[n-1] | ||||
| 		} | ||||
| 	} | ||||
| 	rb.nrune += 1 | ||||
| 	pos := uint8(rb.nbyte) | ||||
| 	rb.nbyte += utf8.UTFMax | ||||
| 	info.pos = pos | ||||
| 	b[n] = info | ||||
| } | ||||
|  | ||||
| // insertErr is an error code returned by insert. Using this type instead | ||||
| // of error improves performance up to 20% for many of the benchmarks. | ||||
| type insertErr int | ||||
|  | ||||
| const ( | ||||
| 	iSuccess insertErr = -iota | ||||
| 	iShortDst | ||||
| 	iShortSrc | ||||
| ) | ||||
|  | ||||
| // insertFlush inserts the given rune in the buffer ordered by CCC. | ||||
| // If a decomposition with multiple segments are encountered, they leading | ||||
| // ones are flushed. | ||||
| // It returns a non-zero error code if the rune was not inserted. | ||||
| func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr { | ||||
| 	if rune := src.hangul(i); rune != 0 { | ||||
| 		rb.decomposeHangul(rune) | ||||
| 		return iSuccess | ||||
| 	} | ||||
| 	if info.hasDecomposition() { | ||||
| 		return rb.insertDecomposed(info.Decomposition()) | ||||
| 	} | ||||
| 	rb.insertSingle(src, i, info) | ||||
| 	return iSuccess | ||||
| } | ||||
|  | ||||
| // insertUnsafe inserts the given rune in the buffer ordered by CCC. | ||||
| // It is assumed there is sufficient space to hold the runes. It is the | ||||
| // responsibility of the caller to ensure this. This can be done by checking | ||||
| // the state returned by the streamSafe type. | ||||
| func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) { | ||||
| 	if rune := src.hangul(i); rune != 0 { | ||||
| 		rb.decomposeHangul(rune) | ||||
| 	} | ||||
| 	if info.hasDecomposition() { | ||||
| 		// TODO: inline. | ||||
| 		rb.insertDecomposed(info.Decomposition()) | ||||
| 	} else { | ||||
| 		rb.insertSingle(src, i, info) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // insertDecomposed inserts an entry in to the reorderBuffer for each rune | ||||
| // in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes. | ||||
| // It flushes the buffer on each new segment start. | ||||
| func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr { | ||||
| 	rb.tmpBytes.setBytes(dcomp) | ||||
| 	// As the streamSafe accounting already handles the counting for modifiers, | ||||
| 	// we don't have to call next. However, we do need to keep the accounting | ||||
| 	// intact when flushing the buffer. | ||||
| 	for i := 0; i < len(dcomp); { | ||||
| 		info := rb.f.info(rb.tmpBytes, i) | ||||
| 		if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() { | ||||
| 			return iShortDst | ||||
| 		} | ||||
| 		i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)]) | ||||
| 		rb.insertOrdered(info) | ||||
| 	} | ||||
| 	return iSuccess | ||||
| } | ||||
|  | ||||
| // insertSingle inserts an entry in the reorderBuffer for the rune at | ||||
| // position i. info is the runeInfo for the rune at position i. | ||||
| func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) { | ||||
| 	src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size)) | ||||
| 	rb.insertOrdered(info) | ||||
| } | ||||
|  | ||||
| // insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb. | ||||
| func (rb *reorderBuffer) insertCGJ() { | ||||
| 	rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))}) | ||||
| } | ||||
|  | ||||
| // appendRune inserts a rune at the end of the buffer. It is used for Hangul. | ||||
| func (rb *reorderBuffer) appendRune(r rune) { | ||||
| 	bn := rb.nbyte | ||||
| 	sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) | ||||
| 	rb.nbyte += utf8.UTFMax | ||||
| 	rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)} | ||||
| 	rb.nrune++ | ||||
| } | ||||
|  | ||||
| // assignRune sets a rune at position pos. It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) assignRune(pos int, r rune) { | ||||
| 	bn := rb.rune[pos].pos | ||||
| 	sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) | ||||
| 	rb.rune[pos] = Properties{pos: bn, size: uint8(sz)} | ||||
| } | ||||
|  | ||||
| // runeAt returns the rune at position n. It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) runeAt(n int) rune { | ||||
| 	inf := rb.rune[n] | ||||
| 	r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size]) | ||||
| 	return r | ||||
| } | ||||
|  | ||||
| // bytesAt returns the UTF-8 encoding of the rune at position n. | ||||
| // It is used for Hangul and recomposition. | ||||
| func (rb *reorderBuffer) bytesAt(n int) []byte { | ||||
| 	inf := rb.rune[n] | ||||
| 	return rb.byte[inf.pos : int(inf.pos)+int(inf.size)] | ||||
| } | ||||
|  | ||||
| // For Hangul we combine algorithmically, instead of using tables. | ||||
| const ( | ||||
| 	hangulBase  = 0xAC00 // UTF-8(hangulBase) -> EA B0 80 | ||||
| 	hangulBase0 = 0xEA | ||||
| 	hangulBase1 = 0xB0 | ||||
| 	hangulBase2 = 0x80 | ||||
|  | ||||
| 	hangulEnd  = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4 | ||||
| 	hangulEnd0 = 0xED | ||||
| 	hangulEnd1 = 0x9E | ||||
| 	hangulEnd2 = 0xA4 | ||||
|  | ||||
| 	jamoLBase  = 0x1100 // UTF-8(jamoLBase) -> E1 84 00 | ||||
| 	jamoLBase0 = 0xE1 | ||||
| 	jamoLBase1 = 0x84 | ||||
| 	jamoLEnd   = 0x1113 | ||||
| 	jamoVBase  = 0x1161 | ||||
| 	jamoVEnd   = 0x1176 | ||||
| 	jamoTBase  = 0x11A7 | ||||
| 	jamoTEnd   = 0x11C3 | ||||
|  | ||||
| 	jamoTCount   = 28 | ||||
| 	jamoVCount   = 21 | ||||
| 	jamoVTCount  = 21 * 28 | ||||
| 	jamoLVTCount = 19 * 21 * 28 | ||||
| ) | ||||
|  | ||||
| const hangulUTF8Size = 3 | ||||
|  | ||||
| func isHangul(b []byte) bool { | ||||
| 	if len(b) < hangulUTF8Size { | ||||
| 		return false | ||||
| 	} | ||||
| 	b0 := b[0] | ||||
| 	if b0 < hangulBase0 { | ||||
| 		return false | ||||
| 	} | ||||
| 	b1 := b[1] | ||||
| 	switch { | ||||
| 	case b0 == hangulBase0: | ||||
| 		return b1 >= hangulBase1 | ||||
| 	case b0 < hangulEnd0: | ||||
| 		return true | ||||
| 	case b0 > hangulEnd0: | ||||
| 		return false | ||||
| 	case b1 < hangulEnd1: | ||||
| 		return true | ||||
| 	} | ||||
| 	return b1 == hangulEnd1 && b[2] < hangulEnd2 | ||||
| } | ||||
|  | ||||
| func isHangulString(b string) bool { | ||||
| 	if len(b) < hangulUTF8Size { | ||||
| 		return false | ||||
| 	} | ||||
| 	b0 := b[0] | ||||
| 	if b0 < hangulBase0 { | ||||
| 		return false | ||||
| 	} | ||||
| 	b1 := b[1] | ||||
| 	switch { | ||||
| 	case b0 == hangulBase0: | ||||
| 		return b1 >= hangulBase1 | ||||
| 	case b0 < hangulEnd0: | ||||
| 		return true | ||||
| 	case b0 > hangulEnd0: | ||||
| 		return false | ||||
| 	case b1 < hangulEnd1: | ||||
| 		return true | ||||
| 	} | ||||
| 	return b1 == hangulEnd1 && b[2] < hangulEnd2 | ||||
| } | ||||
|  | ||||
| // Caller must ensure len(b) >= 2. | ||||
| func isJamoVT(b []byte) bool { | ||||
| 	// True if (rune & 0xff00) == jamoLBase | ||||
| 	return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1 | ||||
| } | ||||
|  | ||||
| func isHangulWithoutJamoT(b []byte) bool { | ||||
| 	c, _ := utf8.DecodeRune(b) | ||||
| 	c -= hangulBase | ||||
| 	return c < jamoLVTCount && c%jamoTCount == 0 | ||||
| } | ||||
|  | ||||
| // decomposeHangul writes the decomposed Hangul to buf and returns the number | ||||
| // of bytes written.  len(buf) should be at least 9. | ||||
| func decomposeHangul(buf []byte, r rune) int { | ||||
| 	const JamoUTF8Len = 3 | ||||
| 	r -= hangulBase | ||||
| 	x := r % jamoTCount | ||||
| 	r /= jamoTCount | ||||
| 	utf8.EncodeRune(buf, jamoLBase+r/jamoVCount) | ||||
| 	utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount) | ||||
| 	if x != 0 { | ||||
| 		utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x) | ||||
| 		return 3 * JamoUTF8Len | ||||
| 	} | ||||
| 	return 2 * JamoUTF8Len | ||||
| } | ||||
|  | ||||
| // decomposeHangul algorithmically decomposes a Hangul rune into | ||||
| // its Jamo components. | ||||
| // See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul. | ||||
| func (rb *reorderBuffer) decomposeHangul(r rune) { | ||||
| 	r -= hangulBase | ||||
| 	x := r % jamoTCount | ||||
| 	r /= jamoTCount | ||||
| 	rb.appendRune(jamoLBase + r/jamoVCount) | ||||
| 	rb.appendRune(jamoVBase + r%jamoVCount) | ||||
| 	if x != 0 { | ||||
| 		rb.appendRune(jamoTBase + x) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // combineHangul algorithmically combines Jamo character components into Hangul. | ||||
| // See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul. | ||||
| func (rb *reorderBuffer) combineHangul(s, i, k int) { | ||||
| 	b := rb.rune[:] | ||||
| 	bn := rb.nrune | ||||
| 	for ; i < bn; i++ { | ||||
| 		cccB := b[k-1].ccc | ||||
| 		cccC := b[i].ccc | ||||
| 		if cccB == 0 { | ||||
| 			s = k - 1 | ||||
| 		} | ||||
| 		if s != k-1 && cccB >= cccC { | ||||
| 			// b[i] is blocked by greater-equal cccX below it | ||||
| 			b[k] = b[i] | ||||
| 			k++ | ||||
| 		} else { | ||||
| 			l := rb.runeAt(s) // also used to compare to hangulBase | ||||
| 			v := rb.runeAt(i) // also used to compare to jamoT | ||||
| 			switch { | ||||
| 			case jamoLBase <= l && l < jamoLEnd && | ||||
| 				jamoVBase <= v && v < jamoVEnd: | ||||
| 				// 11xx plus 116x to LV | ||||
| 				rb.assignRune(s, hangulBase+ | ||||
| 					(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount) | ||||
| 			case hangulBase <= l && l < hangulEnd && | ||||
| 				jamoTBase < v && v < jamoTEnd && | ||||
| 				((l-hangulBase)%jamoTCount) == 0: | ||||
| 				// ACxx plus 11Ax to LVT | ||||
| 				rb.assignRune(s, l+v-jamoTBase) | ||||
| 			default: | ||||
| 				b[k] = b[i] | ||||
| 				k++ | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	rb.nrune = k | ||||
| } | ||||
|  | ||||
| // compose recombines the runes in the buffer. | ||||
| // It should only be used to recompose a single segment, as it will not | ||||
| // handle alternations between Hangul and non-Hangul characters correctly. | ||||
| func (rb *reorderBuffer) compose() { | ||||
| 	// Lazily load the map used by the combine func below, but do | ||||
| 	// it outside of the loop. | ||||
| 	recompMapOnce.Do(buildRecompMap) | ||||
|  | ||||
| 	// UAX #15, section X5 , including Corrigendum #5 | ||||
| 	// "In any character sequence beginning with starter S, a character C is | ||||
| 	//  blocked from S if and only if there is some character B between S | ||||
| 	//  and C, and either B is a starter or it has the same or higher | ||||
| 	//  combining class as C." | ||||
| 	bn := rb.nrune | ||||
| 	if bn == 0 { | ||||
| 		return | ||||
| 	} | ||||
| 	k := 1 | ||||
| 	b := rb.rune[:] | ||||
| 	for s, i := 0, 1; i < bn; i++ { | ||||
| 		if isJamoVT(rb.bytesAt(i)) { | ||||
| 			// Redo from start in Hangul mode. Necessary to support | ||||
| 			// U+320E..U+321E in NFKC mode. | ||||
| 			rb.combineHangul(s, i, k) | ||||
| 			return | ||||
| 		} | ||||
| 		ii := b[i] | ||||
| 		// We can only use combineForward as a filter if we later | ||||
| 		// get the info for the combined character. This is more | ||||
| 		// expensive than using the filter. Using combinesBackward() | ||||
| 		// is safe. | ||||
| 		if ii.combinesBackward() { | ||||
| 			cccB := b[k-1].ccc | ||||
| 			cccC := ii.ccc | ||||
| 			blocked := false // b[i] blocked by starter or greater or equal CCC? | ||||
| 			if cccB == 0 { | ||||
| 				s = k - 1 | ||||
| 			} else { | ||||
| 				blocked = s != k-1 && cccB >= cccC | ||||
| 			} | ||||
| 			if !blocked { | ||||
| 				combined := combine(rb.runeAt(s), rb.runeAt(i)) | ||||
| 				if combined != 0 { | ||||
| 					rb.assignRune(s, combined) | ||||
| 					continue | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		b[k] = b[i] | ||||
| 		k++ | ||||
| 	} | ||||
| 	rb.nrune = k | ||||
| } | ||||
							
								
								
									
										278
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										278
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,278 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "encoding/binary" | ||||
|  | ||||
| // This file contains Form-specific logic and wrappers for data in tables.go. | ||||
|  | ||||
| // Rune info is stored in a separate trie per composing form. A composing form | ||||
| // and its corresponding decomposing form share the same trie.  Each trie maps | ||||
| // a rune to a uint16. The values take two forms.  For v >= 0x8000: | ||||
| //   bits | ||||
| //   15:    1 (inverse of NFD_QC bit of qcInfo) | ||||
| //   13..7: qcInfo (see below). isYesD is always true (no decompostion). | ||||
| //    6..0: ccc (compressed CCC value). | ||||
| // For v < 0x8000, the respective rune has a decomposition and v is an index | ||||
| // into a byte array of UTF-8 decomposition sequences and additional info and | ||||
| // has the form: | ||||
| //    <header> <decomp_byte>* [<tccc> [<lccc>]] | ||||
| // The header contains the number of bytes in the decomposition (excluding this | ||||
| // length byte). The two most significant bits of this length byte correspond | ||||
| // to bit 5 and 4 of qcInfo (see below).  The byte sequence itself starts at v+1. | ||||
| // The byte sequence is followed by a trailing and leading CCC if the values | ||||
| // for these are not zero.  The value of v determines which ccc are appended | ||||
| // to the sequences.  For v < firstCCC, there are none, for v >= firstCCC, | ||||
| // the sequence is followed by a trailing ccc, and for v >= firstLeadingCC | ||||
| // there is an additional leading ccc. The value of tccc itself is the | ||||
| // trailing CCC shifted left 2 bits. The two least-significant bits of tccc | ||||
| // are the number of trailing non-starters. | ||||
|  | ||||
| const ( | ||||
| 	qcInfoMask      = 0x3F // to clear all but the relevant bits in a qcInfo | ||||
| 	headerLenMask   = 0x3F // extract the length value from the header byte | ||||
| 	headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte | ||||
| ) | ||||
|  | ||||
| // Properties provides access to normalization properties of a rune. | ||||
| type Properties struct { | ||||
| 	pos   uint8  // start position in reorderBuffer; used in composition.go | ||||
| 	size  uint8  // length of UTF-8 encoding of this rune | ||||
| 	ccc   uint8  // leading canonical combining class (ccc if not decomposition) | ||||
| 	tccc  uint8  // trailing canonical combining class (ccc if not decomposition) | ||||
| 	nLead uint8  // number of leading non-starters. | ||||
| 	flags qcInfo // quick check flags | ||||
| 	index uint16 | ||||
| } | ||||
|  | ||||
| // functions dispatchable per form | ||||
| type lookupFunc func(b input, i int) Properties | ||||
|  | ||||
| // formInfo holds Form-specific functions and tables. | ||||
| type formInfo struct { | ||||
| 	form                     Form | ||||
| 	composing, compatibility bool // form type | ||||
| 	info                     lookupFunc | ||||
| 	nextMain                 iterFunc | ||||
| } | ||||
|  | ||||
| var formTable = []*formInfo{{ | ||||
| 	form:          NFC, | ||||
| 	composing:     true, | ||||
| 	compatibility: false, | ||||
| 	info:          lookupInfoNFC, | ||||
| 	nextMain:      nextComposed, | ||||
| }, { | ||||
| 	form:          NFD, | ||||
| 	composing:     false, | ||||
| 	compatibility: false, | ||||
| 	info:          lookupInfoNFC, | ||||
| 	nextMain:      nextDecomposed, | ||||
| }, { | ||||
| 	form:          NFKC, | ||||
| 	composing:     true, | ||||
| 	compatibility: true, | ||||
| 	info:          lookupInfoNFKC, | ||||
| 	nextMain:      nextComposed, | ||||
| }, { | ||||
| 	form:          NFKD, | ||||
| 	composing:     false, | ||||
| 	compatibility: true, | ||||
| 	info:          lookupInfoNFKC, | ||||
| 	nextMain:      nextDecomposed, | ||||
| }} | ||||
|  | ||||
| // We do not distinguish between boundaries for NFC, NFD, etc. to avoid | ||||
| // unexpected behavior for the user.  For example, in NFD, there is a boundary | ||||
| // after 'a'.  However, 'a' might combine with modifiers, so from the application's | ||||
| // perspective it is not a good boundary. We will therefore always use the | ||||
| // boundaries for the combining variants. | ||||
|  | ||||
| // BoundaryBefore returns true if this rune starts a new segment and | ||||
| // cannot combine with any rune on the left. | ||||
| func (p Properties) BoundaryBefore() bool { | ||||
| 	if p.ccc == 0 && !p.combinesBackward() { | ||||
| 		return true | ||||
| 	} | ||||
| 	// We assume that the CCC of the first character in a decomposition | ||||
| 	// is always non-zero if different from info.ccc and that we can return | ||||
| 	// false at this point. This is verified by maketables. | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // BoundaryAfter returns true if runes cannot combine with or otherwise | ||||
| // interact with this or previous runes. | ||||
| func (p Properties) BoundaryAfter() bool { | ||||
| 	// TODO: loosen these conditions. | ||||
| 	return p.isInert() | ||||
| } | ||||
|  | ||||
| // We pack quick check data in 4 bits: | ||||
| //   5:    Combines forward  (0 == false, 1 == true) | ||||
| //   4..3: NFC_QC Yes(00), No (10), or Maybe (11) | ||||
| //   2:    NFD_QC Yes (0) or No (1). No also means there is a decomposition. | ||||
| //   1..0: Number of trailing non-starters. | ||||
| // | ||||
| // When all 4 bits are zero, the character is inert, meaning it is never | ||||
| // influenced by normalization. | ||||
| type qcInfo uint8 | ||||
|  | ||||
| func (p Properties) isYesC() bool { return p.flags&0x10 == 0 } | ||||
| func (p Properties) isYesD() bool { return p.flags&0x4 == 0 } | ||||
|  | ||||
| func (p Properties) combinesForward() bool  { return p.flags&0x20 != 0 } | ||||
| func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe | ||||
| func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD | ||||
|  | ||||
| func (p Properties) isInert() bool { | ||||
| 	return p.flags&qcInfoMask == 0 && p.ccc == 0 | ||||
| } | ||||
|  | ||||
| func (p Properties) multiSegment() bool { | ||||
| 	return p.index >= firstMulti && p.index < endMulti | ||||
| } | ||||
|  | ||||
| func (p Properties) nLeadingNonStarters() uint8 { | ||||
| 	return p.nLead | ||||
| } | ||||
|  | ||||
| func (p Properties) nTrailingNonStarters() uint8 { | ||||
| 	return uint8(p.flags & 0x03) | ||||
| } | ||||
|  | ||||
| // Decomposition returns the decomposition for the underlying rune | ||||
| // or nil if there is none. | ||||
| func (p Properties) Decomposition() []byte { | ||||
| 	// TODO: create the decomposition for Hangul? | ||||
| 	if p.index == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	i := p.index | ||||
| 	n := decomps[i] & headerLenMask | ||||
| 	i++ | ||||
| 	return decomps[i : i+uint16(n)] | ||||
| } | ||||
|  | ||||
| // Size returns the length of UTF-8 encoding of the rune. | ||||
| func (p Properties) Size() int { | ||||
| 	return int(p.size) | ||||
| } | ||||
|  | ||||
| // CCC returns the canonical combining class of the underlying rune. | ||||
| func (p Properties) CCC() uint8 { | ||||
| 	if p.index >= firstCCCZeroExcept { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return ccc[p.ccc] | ||||
| } | ||||
|  | ||||
| // LeadCCC returns the CCC of the first rune in the decomposition. | ||||
| // If there is no decomposition, LeadCCC equals CCC. | ||||
| func (p Properties) LeadCCC() uint8 { | ||||
| 	return ccc[p.ccc] | ||||
| } | ||||
|  | ||||
| // TrailCCC returns the CCC of the last rune in the decomposition. | ||||
| // If there is no decomposition, TrailCCC equals CCC. | ||||
| func (p Properties) TrailCCC() uint8 { | ||||
| 	return ccc[p.tccc] | ||||
| } | ||||
|  | ||||
| func buildRecompMap() { | ||||
| 	recompMap = make(map[uint32]rune, len(recompMapPacked)/8) | ||||
| 	var buf [8]byte | ||||
| 	for i := 0; i < len(recompMapPacked); i += 8 { | ||||
| 		copy(buf[:], recompMapPacked[i:i+8]) | ||||
| 		key := binary.BigEndian.Uint32(buf[:4]) | ||||
| 		val := binary.BigEndian.Uint32(buf[4:]) | ||||
| 		recompMap[key] = rune(val) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Recomposition | ||||
| // We use 32-bit keys instead of 64-bit for the two codepoint keys. | ||||
| // This clips off the bits of three entries, but we know this will not | ||||
| // result in a collision. In the unlikely event that changes to | ||||
| // UnicodeData.txt introduce collisions, the compiler will catch it. | ||||
| // Note that the recomposition map for NFC and NFKC are identical. | ||||
|  | ||||
| // combine returns the combined rune or 0 if it doesn't exist. | ||||
| // | ||||
| // The caller is responsible for calling | ||||
| // recompMapOnce.Do(buildRecompMap) sometime before this is called. | ||||
| func combine(a, b rune) rune { | ||||
| 	key := uint32(uint16(a))<<16 + uint32(uint16(b)) | ||||
| 	if recompMap == nil { | ||||
| 		panic("caller error") // see func comment | ||||
| 	} | ||||
| 	return recompMap[key] | ||||
| } | ||||
|  | ||||
| func lookupInfoNFC(b input, i int) Properties { | ||||
| 	v, sz := b.charinfoNFC(i) | ||||
| 	return compInfo(v, sz) | ||||
| } | ||||
|  | ||||
| func lookupInfoNFKC(b input, i int) Properties { | ||||
| 	v, sz := b.charinfoNFKC(i) | ||||
| 	return compInfo(v, sz) | ||||
| } | ||||
|  | ||||
| // Properties returns properties for the first rune in s. | ||||
| func (f Form) Properties(s []byte) Properties { | ||||
| 	if f == NFC || f == NFD { | ||||
| 		return compInfo(nfcData.lookup(s)) | ||||
| 	} | ||||
| 	return compInfo(nfkcData.lookup(s)) | ||||
| } | ||||
|  | ||||
| // PropertiesString returns properties for the first rune in s. | ||||
| func (f Form) PropertiesString(s string) Properties { | ||||
| 	if f == NFC || f == NFD { | ||||
| 		return compInfo(nfcData.lookupString(s)) | ||||
| 	} | ||||
| 	return compInfo(nfkcData.lookupString(s)) | ||||
| } | ||||
|  | ||||
| // compInfo converts the information contained in v and sz | ||||
| // to a Properties.  See the comment at the top of the file | ||||
| // for more information on the format. | ||||
| func compInfo(v uint16, sz int) Properties { | ||||
| 	if v == 0 { | ||||
| 		return Properties{size: uint8(sz)} | ||||
| 	} else if v >= 0x8000 { | ||||
| 		p := Properties{ | ||||
| 			size:  uint8(sz), | ||||
| 			ccc:   uint8(v), | ||||
| 			tccc:  uint8(v), | ||||
| 			flags: qcInfo(v >> 8), | ||||
| 		} | ||||
| 		if p.ccc > 0 || p.combinesBackward() { | ||||
| 			p.nLead = uint8(p.flags & 0x3) | ||||
| 		} | ||||
| 		return p | ||||
| 	} | ||||
| 	// has decomposition | ||||
| 	h := decomps[v] | ||||
| 	f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4 | ||||
| 	p := Properties{size: uint8(sz), flags: f, index: v} | ||||
| 	if v >= firstCCC { | ||||
| 		v += uint16(h&headerLenMask) + 1 | ||||
| 		c := decomps[v] | ||||
| 		p.tccc = c >> 2 | ||||
| 		p.flags |= qcInfo(c & 0x3) | ||||
| 		if v >= firstLeadingCCC { | ||||
| 			p.nLead = c & 0x3 | ||||
| 			if v >= firstStarterWithNLead { | ||||
| 				// We were tricked. Remove the decomposition. | ||||
| 				p.flags &= 0x03 | ||||
| 				p.index = 0 | ||||
| 				return p | ||||
| 			} | ||||
| 			p.ccc = decomps[v+1] | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
							
								
								
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,109 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "unicode/utf8" | ||||
|  | ||||
| type input struct { | ||||
| 	str   string | ||||
| 	bytes []byte | ||||
| } | ||||
|  | ||||
| func inputBytes(str []byte) input { | ||||
| 	return input{bytes: str} | ||||
| } | ||||
|  | ||||
| func inputString(str string) input { | ||||
| 	return input{str: str} | ||||
| } | ||||
|  | ||||
| func (in *input) setBytes(str []byte) { | ||||
| 	in.str = "" | ||||
| 	in.bytes = str | ||||
| } | ||||
|  | ||||
| func (in *input) setString(str string) { | ||||
| 	in.str = str | ||||
| 	in.bytes = nil | ||||
| } | ||||
|  | ||||
| func (in *input) _byte(p int) byte { | ||||
| 	if in.bytes == nil { | ||||
| 		return in.str[p] | ||||
| 	} | ||||
| 	return in.bytes[p] | ||||
| } | ||||
|  | ||||
| func (in *input) skipASCII(p, max int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		for ; p < max && in.str[p] < utf8.RuneSelf; p++ { | ||||
| 		} | ||||
| 	} else { | ||||
| 		for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ { | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| func (in *input) skipContinuationBytes(p int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ { | ||||
| 		} | ||||
| 	} else { | ||||
| 		for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ { | ||||
| 		} | ||||
| 	} | ||||
| 	return p | ||||
| } | ||||
|  | ||||
| func (in *input) appendSlice(buf []byte, b, e int) []byte { | ||||
| 	if in.bytes != nil { | ||||
| 		return append(buf, in.bytes[b:e]...) | ||||
| 	} | ||||
| 	for i := b; i < e; i++ { | ||||
| 		buf = append(buf, in.str[i]) | ||||
| 	} | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| func (in *input) copySlice(buf []byte, b, e int) int { | ||||
| 	if in.bytes == nil { | ||||
| 		return copy(buf, in.str[b:e]) | ||||
| 	} | ||||
| 	return copy(buf, in.bytes[b:e]) | ||||
| } | ||||
|  | ||||
| func (in *input) charinfoNFC(p int) (uint16, int) { | ||||
| 	if in.bytes == nil { | ||||
| 		return nfcData.lookupString(in.str[p:]) | ||||
| 	} | ||||
| 	return nfcData.lookup(in.bytes[p:]) | ||||
| } | ||||
|  | ||||
| func (in *input) charinfoNFKC(p int) (uint16, int) { | ||||
| 	if in.bytes == nil { | ||||
| 		return nfkcData.lookupString(in.str[p:]) | ||||
| 	} | ||||
| 	return nfkcData.lookup(in.bytes[p:]) | ||||
| } | ||||
|  | ||||
| func (in *input) hangul(p int) (r rune) { | ||||
| 	var size int | ||||
| 	if in.bytes == nil { | ||||
| 		if !isHangulString(in.str[p:]) { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		r, size = utf8.DecodeRuneInString(in.str[p:]) | ||||
| 	} else { | ||||
| 		if !isHangul(in.bytes[p:]) { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		r, size = utf8.DecodeRune(in.bytes[p:]) | ||||
| 	} | ||||
| 	if size != hangulUTF8Size { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return r | ||||
| } | ||||
							
								
								
									
										458
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										458
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,458 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
|  | ||||
| // MaxSegmentSize is the maximum size of a byte buffer needed to consider any | ||||
| // sequence of starter and non-starter runes for the purpose of normalization. | ||||
| const MaxSegmentSize = maxByteBufferSize | ||||
|  | ||||
| // An Iter iterates over a string or byte slice, while normalizing it | ||||
| // to a given Form. | ||||
| type Iter struct { | ||||
| 	rb     reorderBuffer | ||||
| 	buf    [maxByteBufferSize]byte | ||||
| 	info   Properties // first character saved from previous iteration | ||||
| 	next   iterFunc   // implementation of next depends on form | ||||
| 	asciiF iterFunc | ||||
|  | ||||
| 	p        int    // current position in input source | ||||
| 	multiSeg []byte // remainder of multi-segment decomposition | ||||
| } | ||||
|  | ||||
| type iterFunc func(*Iter) []byte | ||||
|  | ||||
| // Init initializes i to iterate over src after normalizing it to Form f. | ||||
| func (i *Iter) Init(f Form, src []byte) { | ||||
| 	i.p = 0 | ||||
| 	if len(src) == 0 { | ||||
| 		i.setDone() | ||||
| 		i.rb.nsrc = 0 | ||||
| 		return | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.rb.init(f, src) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.asciiF = nextASCIIBytes | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| } | ||||
|  | ||||
| // InitString initializes i to iterate over src after normalizing it to Form f. | ||||
| func (i *Iter) InitString(f Form, src string) { | ||||
| 	i.p = 0 | ||||
| 	if len(src) == 0 { | ||||
| 		i.setDone() | ||||
| 		i.rb.nsrc = 0 | ||||
| 		return | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.rb.initString(f, src) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.asciiF = nextASCIIString | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| } | ||||
|  | ||||
| // Seek sets the segment to be returned by the next call to Next to start | ||||
| // at position p.  It is the responsibility of the caller to set p to the | ||||
| // start of a segment. | ||||
| func (i *Iter) Seek(offset int64, whence int) (int64, error) { | ||||
| 	var abs int64 | ||||
| 	switch whence { | ||||
| 	case 0: | ||||
| 		abs = offset | ||||
| 	case 1: | ||||
| 		abs = int64(i.p) + offset | ||||
| 	case 2: | ||||
| 		abs = int64(i.rb.nsrc) + offset | ||||
| 	default: | ||||
| 		return 0, fmt.Errorf("norm: invalid whence") | ||||
| 	} | ||||
| 	if abs < 0 { | ||||
| 		return 0, fmt.Errorf("norm: negative position") | ||||
| 	} | ||||
| 	if int(abs) >= i.rb.nsrc { | ||||
| 		i.setDone() | ||||
| 		return int64(i.p), nil | ||||
| 	} | ||||
| 	i.p = int(abs) | ||||
| 	i.multiSeg = nil | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	return abs, nil | ||||
| } | ||||
|  | ||||
| // returnSlice returns a slice of the underlying input type as a byte slice. | ||||
| // If the underlying is of type []byte, it will simply return a slice. | ||||
| // If the underlying is of type string, it will copy the slice to the buffer | ||||
| // and return that. | ||||
| func (i *Iter) returnSlice(a, b int) []byte { | ||||
| 	if i.rb.src.bytes == nil { | ||||
| 		return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])] | ||||
| 	} | ||||
| 	return i.rb.src.bytes[a:b] | ||||
| } | ||||
|  | ||||
| // Pos returns the byte position at which the next call to Next will commence processing. | ||||
| func (i *Iter) Pos() int { | ||||
| 	return i.p | ||||
| } | ||||
|  | ||||
| func (i *Iter) setDone() { | ||||
| 	i.next = nextDone | ||||
| 	i.p = i.rb.nsrc | ||||
| } | ||||
|  | ||||
| // Done returns true if there is no more input to process. | ||||
| func (i *Iter) Done() bool { | ||||
| 	return i.p >= i.rb.nsrc | ||||
| } | ||||
|  | ||||
| // Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input. | ||||
| // For any input a and b for which f(a) == f(b), subsequent calls | ||||
| // to Next will return the same segments. | ||||
| // Modifying runes are grouped together with the preceding starter, if such a starter exists. | ||||
| // Although not guaranteed, n will typically be the smallest possible n. | ||||
| func (i *Iter) Next() []byte { | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextASCIIBytes(i *Iter) []byte { | ||||
| 	p := i.p + 1 | ||||
| 	if p >= i.rb.nsrc { | ||||
| 		p0 := i.p | ||||
| 		i.setDone() | ||||
| 		return i.rb.src.bytes[p0:p] | ||||
| 	} | ||||
| 	if i.rb.src.bytes[p] < utf8.RuneSelf { | ||||
| 		p0 := i.p | ||||
| 		i.p = p | ||||
| 		return i.rb.src.bytes[p0:p] | ||||
| 	} | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextASCIIString(i *Iter) []byte { | ||||
| 	p := i.p + 1 | ||||
| 	if p >= i.rb.nsrc { | ||||
| 		i.buf[0] = i.rb.src.str[i.p] | ||||
| 		i.setDone() | ||||
| 		return i.buf[:1] | ||||
| 	} | ||||
| 	if i.rb.src.str[p] < utf8.RuneSelf { | ||||
| 		i.buf[0] = i.rb.src.str[i.p] | ||||
| 		i.p = p | ||||
| 		return i.buf[:1] | ||||
| 	} | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| func nextHangul(i *Iter) []byte { | ||||
| 	p := i.p | ||||
| 	next := p + hangulUTF8Size | ||||
| 	if next >= i.rb.nsrc { | ||||
| 		i.setDone() | ||||
| 	} else if i.rb.src.hangul(next) == 0 { | ||||
| 		i.rb.ss.next(i.info) | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		i.next = i.rb.f.nextMain | ||||
| 		return i.next(i) | ||||
| 	} | ||||
| 	i.p = next | ||||
| 	return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))] | ||||
| } | ||||
|  | ||||
| func nextDone(i *Iter) []byte { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // nextMulti is used for iterating over multi-segment decompositions | ||||
| // for decomposing normal forms. | ||||
| func nextMulti(i *Iter) []byte { | ||||
| 	j := 0 | ||||
| 	d := i.multiSeg | ||||
| 	// skip first rune | ||||
| 	for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ { | ||||
| 	} | ||||
| 	for j < len(d) { | ||||
| 		info := i.rb.f.info(input{bytes: d}, j) | ||||
| 		if info.BoundaryBefore() { | ||||
| 			i.multiSeg = d[j:] | ||||
| 			return d[:j] | ||||
| 		} | ||||
| 		j += int(info.size) | ||||
| 	} | ||||
| 	// treat last segment as normal decomposition | ||||
| 	i.next = i.rb.f.nextMain | ||||
| 	return i.next(i) | ||||
| } | ||||
|  | ||||
| // nextMultiNorm is used for iterating over multi-segment decompositions | ||||
| // for composing normal forms. | ||||
| func nextMultiNorm(i *Iter) []byte { | ||||
| 	j := 0 | ||||
| 	d := i.multiSeg | ||||
| 	for j < len(d) { | ||||
| 		info := i.rb.f.info(input{bytes: d}, j) | ||||
| 		if info.BoundaryBefore() { | ||||
| 			i.rb.compose() | ||||
| 			seg := i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| 			i.rb.insertUnsafe(input{bytes: d}, j, info) | ||||
| 			i.multiSeg = d[j+int(info.size):] | ||||
| 			return seg | ||||
| 		} | ||||
| 		i.rb.insertUnsafe(input{bytes: d}, j, info) | ||||
| 		j += int(info.size) | ||||
| 	} | ||||
| 	i.multiSeg = nil | ||||
| 	i.next = nextComposed | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
|  | ||||
| // nextDecomposed is the implementation of Next for forms NFD and NFKD. | ||||
| func nextDecomposed(i *Iter) (next []byte) { | ||||
| 	outp := 0 | ||||
| 	inCopyStart, outCopyStart := i.p, 0 | ||||
| 	for { | ||||
| 		if sz := int(i.info.size); sz <= 1 { | ||||
| 			i.rb.ss = 0 | ||||
| 			p := i.p | ||||
| 			i.p++ // ASCII or illegal byte.  Either way, advance by 1. | ||||
| 			if i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				return i.returnSlice(p, i.p) | ||||
| 			} else if i.rb.src._byte(i.p) < utf8.RuneSelf { | ||||
| 				i.next = i.asciiF | ||||
| 				return i.returnSlice(p, i.p) | ||||
| 			} | ||||
| 			outp++ | ||||
| 		} else if d := i.info.Decomposition(); d != nil { | ||||
| 			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero. | ||||
| 			// Case 1: there is a leftover to copy.  In this case the decomposition | ||||
| 			// must begin with a modifier and should always be appended. | ||||
| 			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value. | ||||
| 			p := outp + len(d) | ||||
| 			if outp > 0 { | ||||
| 				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 				// TODO: this condition should not be possible, but we leave it | ||||
| 				// in for defensive purposes. | ||||
| 				if p > len(i.buf) { | ||||
| 					return i.buf[:outp] | ||||
| 				} | ||||
| 			} else if i.info.multiSegment() { | ||||
| 				// outp must be 0 as multi-segment decompositions always | ||||
| 				// start a new segment. | ||||
| 				if i.multiSeg == nil { | ||||
| 					i.multiSeg = d | ||||
| 					i.next = nextMulti | ||||
| 					return nextMulti(i) | ||||
| 				} | ||||
| 				// We are in the last segment.  Treat as normal decomposition. | ||||
| 				d = i.multiSeg | ||||
| 				i.multiSeg = nil | ||||
| 				p = len(d) | ||||
| 			} | ||||
| 			prevCC := i.info.tccc | ||||
| 			if i.p += sz; i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				i.info = Properties{} // Force BoundaryBefore to succeed. | ||||
| 			} else { | ||||
| 				i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 			} | ||||
| 			switch i.rb.ss.next(i.info) { | ||||
| 			case ssOverflow: | ||||
| 				i.next = nextCGJDecompose | ||||
| 				fallthrough | ||||
| 			case ssStarter: | ||||
| 				if outp > 0 { | ||||
| 					copy(i.buf[outp:], d) | ||||
| 					return i.buf[:p] | ||||
| 				} | ||||
| 				return d | ||||
| 			} | ||||
| 			copy(i.buf[outp:], d) | ||||
| 			outp = p | ||||
| 			inCopyStart, outCopyStart = i.p, outp | ||||
| 			if i.info.ccc < prevCC { | ||||
| 				goto doNorm | ||||
| 			} | ||||
| 			continue | ||||
| 		} else if r := i.rb.src.hangul(i.p); r != 0 { | ||||
| 			outp = decomposeHangul(i.buf[:], r) | ||||
| 			i.p += hangulUTF8Size | ||||
| 			inCopyStart, outCopyStart = i.p, outp | ||||
| 			if i.p >= i.rb.nsrc { | ||||
| 				i.setDone() | ||||
| 				break | ||||
| 			} else if i.rb.src.hangul(i.p) != 0 { | ||||
| 				i.next = nextHangul | ||||
| 				return i.buf[:outp] | ||||
| 			} | ||||
| 		} else { | ||||
| 			p := outp + sz | ||||
| 			if p > len(i.buf) { | ||||
| 				break | ||||
| 			} | ||||
| 			outp = p | ||||
| 			i.p += sz | ||||
| 		} | ||||
| 		if i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		prevCC := i.info.tccc | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if v := i.rb.ss.next(i.info); v == ssStarter { | ||||
| 			break | ||||
| 		} else if v == ssOverflow { | ||||
| 			i.next = nextCGJDecompose | ||||
| 			break | ||||
| 		} | ||||
| 		if i.info.ccc < prevCC { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 	} | ||||
| 	if outCopyStart == 0 { | ||||
| 		return i.returnSlice(inCopyStart, i.p) | ||||
| 	} else if inCopyStart < i.p { | ||||
| 		i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 	} | ||||
| 	return i.buf[:outp] | ||||
| doNorm: | ||||
| 	// Insert what we have decomposed so far in the reorderBuffer. | ||||
| 	// As we will only reorder, there will always be enough room. | ||||
| 	i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) | ||||
| 	i.rb.insertDecomposed(i.buf[0:outp]) | ||||
| 	return doNormDecomposed(i) | ||||
| } | ||||
|  | ||||
| func doNormDecomposed(i *Iter) []byte { | ||||
| 	for { | ||||
| 		i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 		if i.p += int(i.info.size); i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if i.info.ccc == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		if s := i.rb.ss.next(i.info); s == ssOverflow { | ||||
| 			i.next = nextCGJDecompose | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	// new segment or too many combining characters: exit normalization | ||||
| 	return i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| } | ||||
|  | ||||
| func nextCGJDecompose(i *Iter) []byte { | ||||
| 	i.rb.ss = 0 | ||||
| 	i.rb.insertCGJ() | ||||
| 	i.next = nextDecomposed | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	buf := doNormDecomposed(i) | ||||
| 	return buf | ||||
| } | ||||
|  | ||||
| // nextComposed is the implementation of Next for forms NFC and NFKC. | ||||
| func nextComposed(i *Iter) []byte { | ||||
| 	outp, startp := 0, i.p | ||||
| 	var prevCC uint8 | ||||
| 	for { | ||||
| 		if !i.info.isYesC() { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 		prevCC = i.info.tccc | ||||
| 		sz := int(i.info.size) | ||||
| 		if sz == 0 { | ||||
| 			sz = 1 // illegal rune: copy byte-by-byte | ||||
| 		} | ||||
| 		p := outp + sz | ||||
| 		if p > len(i.buf) { | ||||
| 			break | ||||
| 		} | ||||
| 		outp = p | ||||
| 		i.p += sz | ||||
| 		if i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} else if i.rb.src._byte(i.p) < utf8.RuneSelf { | ||||
| 			i.rb.ss = 0 | ||||
| 			i.next = i.asciiF | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if v := i.rb.ss.next(i.info); v == ssStarter { | ||||
| 			break | ||||
| 		} else if v == ssOverflow { | ||||
| 			i.next = nextCGJCompose | ||||
| 			break | ||||
| 		} | ||||
| 		if i.info.ccc < prevCC { | ||||
| 			goto doNorm | ||||
| 		} | ||||
| 	} | ||||
| 	return i.returnSlice(startp, i.p) | ||||
| doNorm: | ||||
| 	// reset to start position | ||||
| 	i.p = startp | ||||
| 	i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	if i.info.multiSegment() { | ||||
| 		d := i.info.Decomposition() | ||||
| 		info := i.rb.f.info(input{bytes: d}, 0) | ||||
| 		i.rb.insertUnsafe(input{bytes: d}, 0, info) | ||||
| 		i.multiSeg = d[int(info.size):] | ||||
| 		i.next = nextMultiNorm | ||||
| 		return nextMultiNorm(i) | ||||
| 	} | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
|  | ||||
| func doNormComposed(i *Iter) []byte { | ||||
| 	// First rune should already be inserted. | ||||
| 	for { | ||||
| 		if i.p += int(i.info.size); i.p >= i.rb.nsrc { | ||||
| 			i.setDone() | ||||
| 			break | ||||
| 		} | ||||
| 		i.info = i.rb.f.info(i.rb.src, i.p) | ||||
| 		if s := i.rb.ss.next(i.info); s == ssStarter { | ||||
| 			break | ||||
| 		} else if s == ssOverflow { | ||||
| 			i.next = nextCGJCompose | ||||
| 			break | ||||
| 		} | ||||
| 		i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	} | ||||
| 	i.rb.compose() | ||||
| 	seg := i.buf[:i.rb.flushCopy(i.buf[:])] | ||||
| 	return seg | ||||
| } | ||||
|  | ||||
| func nextCGJCompose(i *Iter) []byte { | ||||
| 	i.rb.ss = 0 // instead of first | ||||
| 	i.rb.insertCGJ() | ||||
| 	i.next = nextComposed | ||||
| 	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter, | ||||
| 	// even if they are not. This is particularly dubious for U+FF9E and UFF9A. | ||||
| 	// If we ever change that, insert a check here. | ||||
| 	i.rb.ss.first(i.info) | ||||
| 	i.rb.insertUnsafe(i.rb.src, i.p, i.info) | ||||
| 	return doNormComposed(i) | ||||
| } | ||||
							
								
								
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,609 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| // Note: the file data_test.go that is generated should not be checked in. | ||||
| //go:generate go run maketables.go triegen.go | ||||
| //go:generate go test -tags test | ||||
|  | ||||
| // Package norm contains types and functions for normalizing Unicode strings. | ||||
| package norm // import "golang.org/x/text/unicode/norm" | ||||
|  | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| // A Form denotes a canonical representation of Unicode code points. | ||||
| // The Unicode-defined normalization and equivalence forms are: | ||||
| // | ||||
| //   NFC   Unicode Normalization Form C | ||||
| //   NFD   Unicode Normalization Form D | ||||
| //   NFKC  Unicode Normalization Form KC | ||||
| //   NFKD  Unicode Normalization Form KD | ||||
| // | ||||
| // For a Form f, this documentation uses the notation f(x) to mean | ||||
| // the bytes or string x converted to the given form. | ||||
| // A position n in x is called a boundary if conversion to the form can | ||||
| // proceed independently on both sides: | ||||
| //   f(x) == append(f(x[0:n]), f(x[n:])...) | ||||
| // | ||||
| // References: https://unicode.org/reports/tr15/ and | ||||
| // https://unicode.org/notes/tn5/. | ||||
| type Form int | ||||
|  | ||||
| const ( | ||||
| 	NFC Form = iota | ||||
| 	NFD | ||||
| 	NFKC | ||||
| 	NFKD | ||||
| ) | ||||
|  | ||||
| // Bytes returns f(b). May return b if f(b) = b. | ||||
| func (f Form) Bytes(b []byte) []byte { | ||||
| 	src := inputBytes(b) | ||||
| 	ft := formTable[f] | ||||
| 	n, ok := ft.quickSpan(src, 0, len(b), true) | ||||
| 	if ok { | ||||
| 		return b | ||||
| 	} | ||||
| 	out := make([]byte, n, len(b)) | ||||
| 	copy(out, b[0:n]) | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush} | ||||
| 	return doAppendInner(&rb, n) | ||||
| } | ||||
|  | ||||
| // String returns f(s). | ||||
| func (f Form) String(s string) string { | ||||
| 	src := inputString(s) | ||||
| 	ft := formTable[f] | ||||
| 	n, ok := ft.quickSpan(src, 0, len(s), true) | ||||
| 	if ok { | ||||
| 		return s | ||||
| 	} | ||||
| 	out := make([]byte, n, len(s)) | ||||
| 	copy(out, s[0:n]) | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush} | ||||
| 	return string(doAppendInner(&rb, n)) | ||||
| } | ||||
|  | ||||
| // IsNormal returns true if b == f(b). | ||||
| func (f Form) IsNormal(b []byte) bool { | ||||
| 	src := inputBytes(b) | ||||
| 	ft := formTable[f] | ||||
| 	bp, ok := ft.quickSpan(src, 0, len(b), true) | ||||
| 	if ok { | ||||
| 		return true | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)} | ||||
| 	rb.setFlusher(nil, cmpNormalBytes) | ||||
| 	for bp < len(b) { | ||||
| 		rb.out = b[bp:] | ||||
| 		if bp = decomposeSegment(&rb, bp, true); bp < 0 { | ||||
| 			return false | ||||
| 		} | ||||
| 		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func cmpNormalBytes(rb *reorderBuffer) bool { | ||||
| 	b := rb.out | ||||
| 	for i := 0; i < rb.nrune; i++ { | ||||
| 		info := rb.rune[i] | ||||
| 		if int(info.size) > len(b) { | ||||
| 			return false | ||||
| 		} | ||||
| 		p := info.pos | ||||
| 		pe := p + info.size | ||||
| 		for ; p < pe; p++ { | ||||
| 			if b[0] != rb.byte[p] { | ||||
| 				return false | ||||
| 			} | ||||
| 			b = b[1:] | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // IsNormalString returns true if s == f(s). | ||||
| func (f Form) IsNormalString(s string) bool { | ||||
| 	src := inputString(s) | ||||
| 	ft := formTable[f] | ||||
| 	bp, ok := ft.quickSpan(src, 0, len(s), true) | ||||
| 	if ok { | ||||
| 		return true | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)} | ||||
| 	rb.setFlusher(nil, func(rb *reorderBuffer) bool { | ||||
| 		for i := 0; i < rb.nrune; i++ { | ||||
| 			info := rb.rune[i] | ||||
| 			if bp+int(info.size) > len(s) { | ||||
| 				return false | ||||
| 			} | ||||
| 			p := info.pos | ||||
| 			pe := p + info.size | ||||
| 			for ; p < pe; p++ { | ||||
| 				if s[bp] != rb.byte[p] { | ||||
| 					return false | ||||
| 				} | ||||
| 				bp++ | ||||
| 			} | ||||
| 		} | ||||
| 		return true | ||||
| 	}) | ||||
| 	for bp < len(s) { | ||||
| 		if bp = decomposeSegment(&rb, bp, true); bp < 0 { | ||||
| 			return false | ||||
| 		} | ||||
| 		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true) | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| // patchTail fixes a case where a rune may be incorrectly normalized | ||||
| // if it is followed by illegal continuation bytes. It returns the | ||||
| // patched buffer and whether the decomposition is still in progress. | ||||
| func patchTail(rb *reorderBuffer) bool { | ||||
| 	info, p := lastRuneStart(&rb.f, rb.out) | ||||
| 	if p == -1 || info.size == 0 { | ||||
| 		return true | ||||
| 	} | ||||
| 	end := p + int(info.size) | ||||
| 	extra := len(rb.out) - end | ||||
| 	if extra > 0 { | ||||
| 		// Potentially allocating memory. However, this only | ||||
| 		// happens with ill-formed UTF-8. | ||||
| 		x := make([]byte, 0) | ||||
| 		x = append(x, rb.out[len(rb.out)-extra:]...) | ||||
| 		rb.out = rb.out[:end] | ||||
| 		decomposeToLastBoundary(rb) | ||||
| 		rb.doFlush() | ||||
| 		rb.out = append(rb.out, x...) | ||||
| 		return false | ||||
| 	} | ||||
| 	buf := rb.out[p:] | ||||
| 	rb.out = rb.out[:p] | ||||
| 	decomposeToLastBoundary(rb) | ||||
| 	if s := rb.ss.next(info); s == ssStarter { | ||||
| 		rb.doFlush() | ||||
| 		rb.ss.first(info) | ||||
| 	} else if s == ssOverflow { | ||||
| 		rb.doFlush() | ||||
| 		rb.insertCGJ() | ||||
| 		rb.ss = 0 | ||||
| 	} | ||||
| 	rb.insertUnsafe(inputBytes(buf), 0, info) | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func appendQuick(rb *reorderBuffer, i int) int { | ||||
| 	if rb.nsrc == i { | ||||
| 		return i | ||||
| 	} | ||||
| 	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true) | ||||
| 	rb.out = rb.src.appendSlice(rb.out, i, end) | ||||
| 	return end | ||||
| } | ||||
|  | ||||
| // Append returns f(append(out, b...)). | ||||
| // The buffer out must be nil, empty, or equal to f(out). | ||||
| func (f Form) Append(out []byte, src ...byte) []byte { | ||||
| 	return f.doAppend(out, inputBytes(src), len(src)) | ||||
| } | ||||
|  | ||||
| func (f Form) doAppend(out []byte, src input, n int) []byte { | ||||
| 	if n == 0 { | ||||
| 		return out | ||||
| 	} | ||||
| 	ft := formTable[f] | ||||
| 	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer. | ||||
| 	if len(out) == 0 { | ||||
| 		p, _ := ft.quickSpan(src, 0, n, true) | ||||
| 		out = src.appendSlice(out, 0, p) | ||||
| 		if p == n { | ||||
| 			return out | ||||
| 		} | ||||
| 		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush} | ||||
| 		return doAppendInner(&rb, p) | ||||
| 	} | ||||
| 	rb := reorderBuffer{f: *ft, src: src, nsrc: n} | ||||
| 	return doAppend(&rb, out, 0) | ||||
| } | ||||
|  | ||||
| func doAppend(rb *reorderBuffer, out []byte, p int) []byte { | ||||
| 	rb.setFlusher(out, appendFlush) | ||||
| 	src, n := rb.src, rb.nsrc | ||||
| 	doMerge := len(out) > 0 | ||||
| 	if q := src.skipContinuationBytes(p); q > p { | ||||
| 		// Move leading non-starters to destination. | ||||
| 		rb.out = src.appendSlice(rb.out, p, q) | ||||
| 		p = q | ||||
| 		doMerge = patchTail(rb) | ||||
| 	} | ||||
| 	fd := &rb.f | ||||
| 	if doMerge { | ||||
| 		var info Properties | ||||
| 		if p < n { | ||||
| 			info = fd.info(src, p) | ||||
| 			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 { | ||||
| 				if p == 0 { | ||||
| 					decomposeToLastBoundary(rb) | ||||
| 				} | ||||
| 				p = decomposeSegment(rb, p, true) | ||||
| 			} | ||||
| 		} | ||||
| 		if info.size == 0 { | ||||
| 			rb.doFlush() | ||||
| 			// Append incomplete UTF-8 encoding. | ||||
| 			return src.appendSlice(rb.out, p, n) | ||||
| 		} | ||||
| 		if rb.nrune > 0 { | ||||
| 			return doAppendInner(rb, p) | ||||
| 		} | ||||
| 	} | ||||
| 	p = appendQuick(rb, p) | ||||
| 	return doAppendInner(rb, p) | ||||
| } | ||||
|  | ||||
| func doAppendInner(rb *reorderBuffer, p int) []byte { | ||||
| 	for n := rb.nsrc; p < n; { | ||||
| 		p = decomposeSegment(rb, p, true) | ||||
| 		p = appendQuick(rb, p) | ||||
| 	} | ||||
| 	return rb.out | ||||
| } | ||||
|  | ||||
| // AppendString returns f(append(out, []byte(s))). | ||||
| // The buffer out must be nil, empty, or equal to f(out). | ||||
| func (f Form) AppendString(out []byte, src string) []byte { | ||||
| 	return f.doAppend(out, inputString(src), len(src)) | ||||
| } | ||||
|  | ||||
| // QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) QuickSpan(b []byte) int { | ||||
| 	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true) | ||||
| 	return n | ||||
| } | ||||
|  | ||||
| // Span implements transform.SpanningTransformer. It returns a boundary n such | ||||
| // that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n. | ||||
| func (f Form) Span(b []byte, atEOF bool) (n int, err error) { | ||||
| 	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF) | ||||
| 	if n < len(b) { | ||||
| 		if !ok { | ||||
| 			err = transform.ErrEndOfSpan | ||||
| 		} else { | ||||
| 			err = transform.ErrShortSrc | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // SpanString returns a boundary n such that s[0:n] == f(s[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) SpanString(s string, atEOF bool) (n int, err error) { | ||||
| 	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF) | ||||
| 	if n < len(s) { | ||||
| 		if !ok { | ||||
| 			err = transform.ErrEndOfSpan | ||||
| 		} else { | ||||
| 			err = transform.ErrShortSrc | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and | ||||
| // whether any non-normalized parts were found. If atEOF is false, n will | ||||
| // not point past the last segment if this segment might be become | ||||
| // non-normalized by appending other runes. | ||||
| func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) { | ||||
| 	var lastCC uint8 | ||||
| 	ss := streamSafe(0) | ||||
| 	lastSegStart := i | ||||
| 	for n = end; i < n; { | ||||
| 		if j := src.skipASCII(i, n); i != j { | ||||
| 			i = j | ||||
| 			lastSegStart = i - 1 | ||||
| 			lastCC = 0 | ||||
| 			ss = 0 | ||||
| 			continue | ||||
| 		} | ||||
| 		info := f.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			if atEOF { | ||||
| 				// include incomplete runes | ||||
| 				return n, true | ||||
| 			} | ||||
| 			return lastSegStart, true | ||||
| 		} | ||||
| 		// This block needs to be before the next, because it is possible to | ||||
| 		// have an overflow for runes that are starters (e.g. with U+FF9E). | ||||
| 		switch ss.next(info) { | ||||
| 		case ssStarter: | ||||
| 			lastSegStart = i | ||||
| 		case ssOverflow: | ||||
| 			return lastSegStart, false | ||||
| 		case ssSuccess: | ||||
| 			if lastCC > info.ccc { | ||||
| 				return lastSegStart, false | ||||
| 			} | ||||
| 		} | ||||
| 		if f.composing { | ||||
| 			if !info.isYesC() { | ||||
| 				break | ||||
| 			} | ||||
| 		} else { | ||||
| 			if !info.isYesD() { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 		lastCC = info.ccc | ||||
| 		i += int(info.size) | ||||
| 	} | ||||
| 	if i == n { | ||||
| 		if !atEOF { | ||||
| 			n = lastSegStart | ||||
| 		} | ||||
| 		return n, true | ||||
| 	} | ||||
| 	return lastSegStart, false | ||||
| } | ||||
|  | ||||
| // QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). | ||||
| // It is not guaranteed to return the largest such n. | ||||
| func (f Form) QuickSpanString(s string) int { | ||||
| 	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true) | ||||
| 	return n | ||||
| } | ||||
|  | ||||
| // FirstBoundary returns the position i of the first boundary in b | ||||
| // or -1 if b contains no boundary. | ||||
| func (f Form) FirstBoundary(b []byte) int { | ||||
| 	return f.firstBoundary(inputBytes(b), len(b)) | ||||
| } | ||||
|  | ||||
| func (f Form) firstBoundary(src input, nsrc int) int { | ||||
| 	i := src.skipContinuationBytes(0) | ||||
| 	if i >= nsrc { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	fd := formTable[f] | ||||
| 	ss := streamSafe(0) | ||||
| 	// We should call ss.first here, but we can't as the first rune is | ||||
| 	// skipped already. This means FirstBoundary can't really determine | ||||
| 	// CGJ insertion points correctly. Luckily it doesn't have to. | ||||
| 	for { | ||||
| 		info := fd.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			return -1 | ||||
| 		} | ||||
| 		if s := ss.next(info); s != ssSuccess { | ||||
| 			return i | ||||
| 		} | ||||
| 		i += int(info.size) | ||||
| 		if i >= nsrc { | ||||
| 			if !info.BoundaryAfter() && !ss.isMax() { | ||||
| 				return -1 | ||||
| 			} | ||||
| 			return nsrc | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // FirstBoundaryInString returns the position i of the first boundary in s | ||||
| // or -1 if s contains no boundary. | ||||
| func (f Form) FirstBoundaryInString(s string) int { | ||||
| 	return f.firstBoundary(inputString(s), len(s)) | ||||
| } | ||||
|  | ||||
| // NextBoundary reports the index of the boundary between the first and next | ||||
| // segment in b or -1 if atEOF is false and there are not enough bytes to | ||||
| // determine this boundary. | ||||
| func (f Form) NextBoundary(b []byte, atEOF bool) int { | ||||
| 	return f.nextBoundary(inputBytes(b), len(b), atEOF) | ||||
| } | ||||
|  | ||||
| // NextBoundaryInString reports the index of the boundary between the first and | ||||
| // next segment in b or -1 if atEOF is false and there are not enough bytes to | ||||
| // determine this boundary. | ||||
| func (f Form) NextBoundaryInString(s string, atEOF bool) int { | ||||
| 	return f.nextBoundary(inputString(s), len(s), atEOF) | ||||
| } | ||||
|  | ||||
| func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { | ||||
| 	if nsrc == 0 { | ||||
| 		if atEOF { | ||||
| 			return 0 | ||||
| 		} | ||||
| 		return -1 | ||||
| 	} | ||||
| 	fd := formTable[f] | ||||
| 	info := fd.info(src, 0) | ||||
| 	if info.size == 0 { | ||||
| 		if atEOF { | ||||
| 			return 1 | ||||
| 		} | ||||
| 		return -1 | ||||
| 	} | ||||
| 	ss := streamSafe(0) | ||||
| 	ss.first(info) | ||||
|  | ||||
| 	for i := int(info.size); i < nsrc; i += int(info.size) { | ||||
| 		info = fd.info(src, i) | ||||
| 		if info.size == 0 { | ||||
| 			if atEOF { | ||||
| 				return i | ||||
| 			} | ||||
| 			return -1 | ||||
| 		} | ||||
| 		// TODO: Using streamSafe to determine the boundary isn't the same as | ||||
| 		// using BoundaryBefore. Determine which should be used. | ||||
| 		if s := ss.next(info); s != ssSuccess { | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	if !atEOF && !info.BoundaryAfter() && !ss.isMax() { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	return nsrc | ||||
| } | ||||
|  | ||||
| // LastBoundary returns the position i of the last boundary in b | ||||
| // or -1 if b contains no boundary. | ||||
| func (f Form) LastBoundary(b []byte) int { | ||||
| 	return lastBoundary(formTable[f], b) | ||||
| } | ||||
|  | ||||
| func lastBoundary(fd *formInfo, b []byte) int { | ||||
| 	i := len(b) | ||||
| 	info, p := lastRuneStart(fd, b) | ||||
| 	if p == -1 { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	if info.size == 0 { // ends with incomplete rune | ||||
| 		if p == 0 { // starts with incomplete rune | ||||
| 			return -1 | ||||
| 		} | ||||
| 		i = p | ||||
| 		info, p = lastRuneStart(fd, b[:i]) | ||||
| 		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8 | ||||
| 		return i | ||||
| 	} | ||||
| 	if info.BoundaryAfter() { | ||||
| 		return i | ||||
| 	} | ||||
| 	ss := streamSafe(0) | ||||
| 	v := ss.backwards(info) | ||||
| 	for i = p; i >= 0 && v != ssStarter; i = p { | ||||
| 		info, p = lastRuneStart(fd, b[:i]) | ||||
| 		if v = ss.backwards(info); v == ssOverflow { | ||||
| 			break | ||||
| 		} | ||||
| 		if p+int(info.size) != i { | ||||
| 			if p == -1 { // no boundary found | ||||
| 				return -1 | ||||
| 			} | ||||
| 			return i // boundary after an illegal UTF-8 encoding | ||||
| 		} | ||||
| 	} | ||||
| 	return i | ||||
| } | ||||
|  | ||||
| // decomposeSegment scans the first segment in src into rb. It inserts 0x034f | ||||
| // (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters | ||||
| // and returns the number of bytes consumed from src or iShortDst or iShortSrc. | ||||
| func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { | ||||
| 	// Force one character to be consumed. | ||||
| 	info := rb.f.info(rb.src, sp) | ||||
| 	if info.size == 0 { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	if s := rb.ss.next(info); s == ssStarter { | ||||
| 		// TODO: this could be removed if we don't support merging. | ||||
| 		if rb.nrune > 0 { | ||||
| 			goto end | ||||
| 		} | ||||
| 	} else if s == ssOverflow { | ||||
| 		rb.insertCGJ() | ||||
| 		goto end | ||||
| 	} | ||||
| 	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { | ||||
| 		return int(err) | ||||
| 	} | ||||
| 	for { | ||||
| 		sp += int(info.size) | ||||
| 		if sp >= rb.nsrc { | ||||
| 			if !atEOF && !info.BoundaryAfter() { | ||||
| 				return int(iShortSrc) | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		info = rb.f.info(rb.src, sp) | ||||
| 		if info.size == 0 { | ||||
| 			if !atEOF { | ||||
| 				return int(iShortSrc) | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		if s := rb.ss.next(info); s == ssStarter { | ||||
| 			break | ||||
| 		} else if s == ssOverflow { | ||||
| 			rb.insertCGJ() | ||||
| 			break | ||||
| 		} | ||||
| 		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { | ||||
| 			return int(err) | ||||
| 		} | ||||
| 	} | ||||
| end: | ||||
| 	if !rb.doFlush() { | ||||
| 		return int(iShortDst) | ||||
| 	} | ||||
| 	return sp | ||||
| } | ||||
|  | ||||
| // lastRuneStart returns the runeInfo and position of the last | ||||
| // rune in buf or the zero runeInfo and -1 if no rune was found. | ||||
| func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) { | ||||
| 	p := len(buf) - 1 | ||||
| 	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { | ||||
| 	} | ||||
| 	if p < 0 { | ||||
| 		return Properties{}, -1 | ||||
| 	} | ||||
| 	return fd.info(inputBytes(buf), p), p | ||||
| } | ||||
|  | ||||
| // decomposeToLastBoundary finds an open segment at the end of the buffer | ||||
| // and scans it into rb. Returns the buffer minus the last segment. | ||||
| func decomposeToLastBoundary(rb *reorderBuffer) { | ||||
| 	fd := &rb.f | ||||
| 	info, i := lastRuneStart(fd, rb.out) | ||||
| 	if int(info.size) != len(rb.out)-i { | ||||
| 		// illegal trailing continuation bytes | ||||
| 		return | ||||
| 	} | ||||
| 	if info.BoundaryAfter() { | ||||
| 		return | ||||
| 	} | ||||
| 	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order | ||||
| 	padd := 0 | ||||
| 	ss := streamSafe(0) | ||||
| 	p := len(rb.out) | ||||
| 	for { | ||||
| 		add[padd] = info | ||||
| 		v := ss.backwards(info) | ||||
| 		if v == ssOverflow { | ||||
| 			// Note that if we have an overflow, it the string we are appending to | ||||
| 			// is not correctly normalized. In this case the behavior is undefined. | ||||
| 			break | ||||
| 		} | ||||
| 		padd++ | ||||
| 		p -= int(info.size) | ||||
| 		if v == ssStarter || p < 0 { | ||||
| 			break | ||||
| 		} | ||||
| 		info, i = lastRuneStart(fd, rb.out[:p]) | ||||
| 		if int(info.size) != p-i { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	rb.ss = ss | ||||
| 	// Copy bytes for insertion as we may need to overwrite rb.out. | ||||
| 	var buf [maxBufferSize * utf8.UTFMax]byte | ||||
| 	cp := buf[:copy(buf[:], rb.out[p:])] | ||||
| 	rb.out = rb.out[:p] | ||||
| 	for padd--; padd >= 0; padd-- { | ||||
| 		info = add[padd] | ||||
| 		rb.insertUnsafe(inputBytes(cp), 0, info) | ||||
| 		cp = cp[info.size:] | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import "io" | ||||
|  | ||||
| type normWriter struct { | ||||
| 	rb  reorderBuffer | ||||
| 	w   io.Writer | ||||
| 	buf []byte | ||||
| } | ||||
|  | ||||
| // Write implements the standard write interface.  If the last characters are | ||||
| // not at a normalization boundary, the bytes will be buffered for the next | ||||
| // write. The remaining bytes will be written on close. | ||||
| func (w *normWriter) Write(data []byte) (n int, err error) { | ||||
| 	// Process data in pieces to keep w.buf size bounded. | ||||
| 	const chunk = 4000 | ||||
|  | ||||
| 	for len(data) > 0 { | ||||
| 		// Normalize into w.buf. | ||||
| 		m := len(data) | ||||
| 		if m > chunk { | ||||
| 			m = chunk | ||||
| 		} | ||||
| 		w.rb.src = inputBytes(data[:m]) | ||||
| 		w.rb.nsrc = m | ||||
| 		w.buf = doAppend(&w.rb, w.buf, 0) | ||||
| 		data = data[m:] | ||||
| 		n += m | ||||
|  | ||||
| 		// Write out complete prefix, save remainder. | ||||
| 		// Note that lastBoundary looks back at most 31 runes. | ||||
| 		i := lastBoundary(&w.rb.f, w.buf) | ||||
| 		if i == -1 { | ||||
| 			i = 0 | ||||
| 		} | ||||
| 		if i > 0 { | ||||
| 			if _, err = w.w.Write(w.buf[:i]); err != nil { | ||||
| 				break | ||||
| 			} | ||||
| 			bn := copy(w.buf, w.buf[i:]) | ||||
| 			w.buf = w.buf[:bn] | ||||
| 		} | ||||
| 	} | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // Close forces data that remains in the buffer to be written. | ||||
| func (w *normWriter) Close() error { | ||||
| 	if len(w.buf) > 0 { | ||||
| 		_, err := w.w.Write(w.buf) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Writer returns a new writer that implements Write(b) | ||||
| // by writing f(b) to w. The returned writer may use an | ||||
| // internal buffer to maintain state across Write calls. | ||||
| // Calling its Close method writes any buffered data to w. | ||||
| func (f Form) Writer(w io.Writer) io.WriteCloser { | ||||
| 	wr := &normWriter{rb: reorderBuffer{}, w: w} | ||||
| 	wr.rb.init(f, nil) | ||||
| 	return wr | ||||
| } | ||||
|  | ||||
| type normReader struct { | ||||
| 	rb           reorderBuffer | ||||
| 	r            io.Reader | ||||
| 	inbuf        []byte | ||||
| 	outbuf       []byte | ||||
| 	bufStart     int | ||||
| 	lastBoundary int | ||||
| 	err          error | ||||
| } | ||||
|  | ||||
| // Read implements the standard read interface. | ||||
| func (r *normReader) Read(p []byte) (int, error) { | ||||
| 	for { | ||||
| 		if r.lastBoundary-r.bufStart > 0 { | ||||
| 			n := copy(p, r.outbuf[r.bufStart:r.lastBoundary]) | ||||
| 			r.bufStart += n | ||||
| 			if r.lastBoundary-r.bufStart > 0 { | ||||
| 				return n, nil | ||||
| 			} | ||||
| 			return n, r.err | ||||
| 		} | ||||
| 		if r.err != nil { | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 		outn := copy(r.outbuf, r.outbuf[r.lastBoundary:]) | ||||
| 		r.outbuf = r.outbuf[0:outn] | ||||
| 		r.bufStart = 0 | ||||
|  | ||||
| 		n, err := r.r.Read(r.inbuf) | ||||
| 		r.rb.src = inputBytes(r.inbuf[0:n]) | ||||
| 		r.rb.nsrc, r.err = n, err | ||||
| 		if n > 0 { | ||||
| 			r.outbuf = doAppend(&r.rb, r.outbuf, 0) | ||||
| 		} | ||||
| 		if err == io.EOF { | ||||
| 			r.lastBoundary = len(r.outbuf) | ||||
| 		} else { | ||||
| 			r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf) | ||||
| 			if r.lastBoundary == -1 { | ||||
| 				r.lastBoundary = 0 | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Reader returns a new reader that implements Read | ||||
| // by reading data from r and returning f(data). | ||||
| func (f Form) Reader(r io.Reader) io.Reader { | ||||
| 	const chunk = 4000 | ||||
| 	buf := make([]byte, chunk) | ||||
| 	rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf} | ||||
| 	rr.rb.init(f, buf) | ||||
| 	return rr | ||||
| } | ||||
							
								
								
									
										7657
									
								
								vendor/golang.org/x/text/unicode/norm/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7657
									
								
								vendor/golang.org/x/text/unicode/norm/tables10.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7693
									
								
								vendor/golang.org/x/text/unicode/norm/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7693
									
								
								vendor/golang.org/x/text/unicode/norm/tables11.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										7637
									
								
								vendor/golang.org/x/text/unicode/norm/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7637
									
								
								vendor/golang.org/x/text/unicode/norm/tables9.0.0.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,88 @@ | ||||
| // Copyright 2013 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
|  | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| // Reset implements the Reset method of the transform.Transformer interface. | ||||
| func (Form) Reset() {} | ||||
|  | ||||
| // Transform implements the Transform method of the transform.Transformer | ||||
| // interface. It may need to write segments of up to MaxSegmentSize at once. | ||||
| // Users should either catch ErrShortDst and allow dst to grow or have dst be at | ||||
| // least of size MaxTransformChunkSize to be guaranteed of progress. | ||||
| func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	// Cap the maximum number of src bytes to check. | ||||
| 	b := src | ||||
| 	eof := atEOF | ||||
| 	if ns := len(dst); ns < len(b) { | ||||
| 		err = transform.ErrShortDst | ||||
| 		eof = false | ||||
| 		b = b[:ns] | ||||
| 	} | ||||
| 	i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof) | ||||
| 	n := copy(dst, b[:i]) | ||||
| 	if !ok { | ||||
| 		nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF) | ||||
| 		return nDst + n, nSrc + n, err | ||||
| 	} | ||||
|  | ||||
| 	if err == nil && n < len(src) && !atEOF { | ||||
| 		err = transform.ErrShortSrc | ||||
| 	} | ||||
| 	return n, n, err | ||||
| } | ||||
|  | ||||
| func flushTransform(rb *reorderBuffer) bool { | ||||
| 	// Write out (must fully fit in dst, or else it is an ErrShortDst). | ||||
| 	if len(rb.out) < rb.nrune*utf8.UTFMax { | ||||
| 		return false | ||||
| 	} | ||||
| 	rb.out = rb.out[rb.flushCopy(rb.out):] | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc} | ||||
|  | ||||
| // transform implements the transform.Transformer interface. It is only called | ||||
| // when quickSpan does not pass for a given string. | ||||
| func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||||
| 	// TODO: get rid of reorderBuffer. See CL 23460044. | ||||
| 	rb := reorderBuffer{} | ||||
| 	rb.init(f, src) | ||||
| 	for { | ||||
| 		// Load segment into reorder buffer. | ||||
| 		rb.setFlusher(dst[nDst:], flushTransform) | ||||
| 		end := decomposeSegment(&rb, nSrc, atEOF) | ||||
| 		if end < 0 { | ||||
| 			return nDst, nSrc, errs[-end] | ||||
| 		} | ||||
| 		nDst = len(dst) - len(rb.out) | ||||
| 		nSrc = end | ||||
|  | ||||
| 		// Next quickSpan. | ||||
| 		end = rb.nsrc | ||||
| 		eof := atEOF | ||||
| 		if n := nSrc + len(dst) - nDst; n < end { | ||||
| 			err = transform.ErrShortDst | ||||
| 			end = n | ||||
| 			eof = false | ||||
| 		} | ||||
| 		end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof) | ||||
| 		n := copy(dst[nDst:], rb.src.bytes[nSrc:end]) | ||||
| 		nSrc += n | ||||
| 		nDst += n | ||||
| 		if ok { | ||||
| 			if err == nil && n < rb.nsrc && !atEOF { | ||||
| 				err = transform.ErrShortSrc | ||||
| 			} | ||||
| 			return nDst, nSrc, err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,54 @@ | ||||
| // Copyright 2011 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package norm | ||||
|  | ||||
| type valueRange struct { | ||||
| 	value  uint16 // header: value:stride | ||||
| 	lo, hi byte   // header: lo:n | ||||
| } | ||||
|  | ||||
| type sparseBlocks struct { | ||||
| 	values []valueRange | ||||
| 	offset []uint16 | ||||
| } | ||||
|  | ||||
| var nfcSparse = sparseBlocks{ | ||||
| 	values: nfcSparseValues[:], | ||||
| 	offset: nfcSparseOffset[:], | ||||
| } | ||||
|  | ||||
| var nfkcSparse = sparseBlocks{ | ||||
| 	values: nfkcSparseValues[:], | ||||
| 	offset: nfkcSparseOffset[:], | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	nfcData  = newNfcTrie(0) | ||||
| 	nfkcData = newNfkcTrie(0) | ||||
| ) | ||||
|  | ||||
| // lookupValue determines the type of block n and looks up the value for b. | ||||
| // For n < t.cutoff, the block is a simple lookup table. Otherwise, the block | ||||
| // is a list of ranges with an accompanying value. Given a matching range r, | ||||
| // the value for b is by r.value + (b - r.lo) * stride. | ||||
| func (t *sparseBlocks) lookup(n uint32, b byte) uint16 { | ||||
| 	offset := t.offset[n] | ||||
| 	header := t.values[offset] | ||||
| 	lo := offset + 1 | ||||
| 	hi := lo + uint16(header.lo) | ||||
| 	for lo < hi { | ||||
| 		m := lo + (hi-lo)/2 | ||||
| 		r := t.values[m] | ||||
| 		if r.lo <= b && b <= r.hi { | ||||
| 			return r.value + uint16(b-r.lo)*header.value | ||||
| 		} | ||||
| 		if b < r.lo { | ||||
| 			hi = m | ||||
| 		} else { | ||||
| 			lo = m + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
		Reference in New Issue
	
	Block a user