version: 1.10
package utf8
import "unicode/utf8"
Overview
Package utf8 implements functions and constants to support text encoded in UTF-8. It includes functions to translate between runes and UTF-8 byte sequences.
Index
- Constants
- func DecodeLastRune(p []byte) (r rune, size int)
- func DecodeLastRuneInString(s string) (r rune, size int)
- func DecodeRune(p []byte) (r rune, size int)
- func DecodeRuneInString(s string) (r rune, size int)
- func EncodeRune(p []byte, r rune) int
- func FullRune(p []byte) bool
- func FullRuneInString(s string) bool
- func RuneCount(p []byte) int
- func RuneCountInString(s string) (n int)
- func RuneLen(r rune) int
- func RuneStart(b byte) bool
- func Valid(p []byte) bool
- func ValidRune(r rune) bool
- func ValidString(s string) bool
Examples
- DecodeLastRune
- DecodeLastRuneInString
- DecodeRune
- DecodeRuneInString
- EncodeRune
- FullRune
- FullRuneInString
- RuneCount
- RuneCountInString
- RuneLen
- RuneStart
- Valid
- ValidRune
- ValidString
Package files
Constants
- const (
- RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
- RuneSelf = 0x80 // characters below Runeself are represented as themselves in a single byte.
- MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
- UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character.
- )
Numbers fundamental to the encoding.
func DecodeLastRune ¶
DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8.
An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed.
b := []byte("Hello, 世界")for len(b) > 0 {r, size := utf8.DecodeLastRune(b)fmt.Printf("%c %v\n", r, size)b = b[:len(b)-size]}// Output:// 界 3// 世 3// 1// , 1// o 1// l 1// l 1// e 1// H 1
func DecodeLastRuneInString ¶
DecodeLastRuneInString is like DecodeLastRune but its input is a string. If s is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8.
An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed.
str := "Hello, 世界"for len(str) > 0 {r, size := utf8.DecodeLastRuneInString(str)fmt.Printf("%c %v\n", r, size)str = str[:len(str)-size]}// Output:// 界 3// 世 3// 1// , 1// o 1// l 1// l 1// e 1// H 1
func DecodeRune ¶
DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8.
An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed.
b := []byte("Hello, 世界")for len(b) > 0 {r, size := utf8.DecodeRune(b)fmt.Printf("%c %v\n", r, size)b = b[size:]}// Output:// H 1// e 1// l 1// l 1// o 1// , 1// 1// 世 3// 界 3
func DecodeRuneInString ¶
DecodeRuneInString is like DecodeRune but its input is a string. If s is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8.
An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed.
str := "Hello, 世界"for len(str) > 0 {r, size := utf8.DecodeRuneInString(str)fmt.Printf("%c %v\n", r, size)str = str[size:]}// Output:// H 1// e 1// l 1// l 1// o 1// , 1// 1// 世 3// 界 3
func EncodeRune ¶
EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. It returns the number of bytes written.
r := '世'buf := make([]byte, 3)n := utf8.EncodeRune(buf, r)fmt.Println(buf)fmt.Println(n)// Output:// [228 184 150]// 3
func FullRune ¶
FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. An invalid encoding is considered a full Rune since it will convert as a width-1 error rune.
buf := []byte{228, 184, 150} // 世fmt.Println(utf8.FullRune(buf))fmt.Println(utf8.FullRune(buf[:2]))// Output:// true// false
func FullRuneInString ¶
FullRuneInString is like FullRune but its input is a string.
str := "世"fmt.Println(utf8.FullRuneInString(str))fmt.Println(utf8.FullRuneInString(str[:2]))// Output:// true// false
func RuneCount ¶
RuneCount returns the number of runes in p. Erroneous and short encodings are treated as single runes of width 1 byte.
buf := []byte("Hello, 世界")fmt.Println("bytes =", len(buf))fmt.Println("runes =", utf8.RuneCount(buf))// Output:// bytes = 13// runes = 9
func RuneCountInString ¶
RuneCountInString is like RuneCount but its input is a string.
str := "Hello, 世界"fmt.Println("bytes =", len(str))fmt.Println("runes =", utf8.RuneCountInString(str))// Output:// bytes = 13// runes = 9
func RuneLen ¶
RuneLen returns the number of bytes required to encode the rune. It returns -1 if the rune is not a valid value to encode in UTF-8.
fmt.Println(utf8.RuneLen('a'))fmt.Println(utf8.RuneLen('界'))// Output:// 1// 3
func RuneStart ¶
RuneStart reports whether the byte could be the first byte of an encoded, possibly invalid rune. Second and subsequent bytes always have the top two bits set to 10.
buf := []byte("a界")fmt.Println(utf8.RuneStart(buf[0]))fmt.Println(utf8.RuneStart(buf[1]))fmt.Println(utf8.RuneStart(buf[2]))// Output:// true// true// false
func Valid ¶
Valid reports whether p consists entirely of valid UTF-8-encoded runes.
valid := []byte("Hello, 世界")invalid := []byte{0xff, 0xfe, 0xfd}fmt.Println(utf8.Valid(valid))fmt.Println(utf8.Valid(invalid))// Output:// true// false
func ValidRune ¶
ValidRune reports whether r can be legally encoded as UTF-8. Code points that are out of range or a surrogate half are illegal.
valid := 'a'invalid := rune(0xfffffff)fmt.Println(utf8.ValidRune(valid))fmt.Println(utf8.ValidRune(invalid))// Output:// true// false
func ValidString ¶
ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
valid := "Hello, 世界"invalid := string([]byte{0xff, 0xfe, 0xfd})fmt.Println(utf8.ValidString(valid))fmt.Println(utf8.ValidString(invalid))// Output:// true// false
