iedc-go/vendor/github.com/Chain-Zhang/pinyin/pinyin.go
2023-03-03 03:12:15 +08:00

174 lines
3.2 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package pinyin
import (
"strconv"
"strings"
"unicode/utf8"
)
var (
tones = [][]rune{
{'ā', 'ē', 'ī', 'ō', 'ū', 'ǖ', 'Ā', 'Ē', 'Ī', 'Ō', 'Ū', 'Ǖ'},
{'á', 'é', 'í', 'ó', 'ú', 'ǘ', 'Á', 'É', 'Í', 'Ó', 'Ú', 'Ǘ'},
{'ǎ', 'ě', 'ǐ', 'ǒ', 'ǔ', 'ǚ', 'Ǎ', 'Ě', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǚ'},
{'à', 'è', 'ì', 'ò', 'ù', 'ǜ', 'À', 'È', 'Ì', 'Ò', 'Ù', 'Ǜ'},
}
neutrals = []rune{'a', 'e', 'i', 'o', 'u', 'v', 'A', 'E', 'I', 'O', 'U', 'V'}
)
var (
// 从带声调的声母到对应的英文字符的映射
tonesMap map[rune]rune
// 从汉字到声调的映射
numericTonesMap map[rune]int
// 从汉字到拼音的映射(带声调)
pinyinMap map[rune]string
initialized bool
)
type Mode int
const (
WithoutTone Mode = iota + 1 // 默认模式例如guo
Tone // 带声调的拼音 例如guó
InitialsInCapitals // 首字母大写不带声调例如Guo
)
type pinyin struct {
origin string
split string
mode Mode
}
func init() {
tonesMap = make(map[rune]rune)
numericTonesMap = make(map[rune]int)
pinyinMap = make(map[rune]string)
for i, runes := range tones {
for j, tone := range runes {
tonesMap[tone] = neutrals[j]
numericTonesMap[tone] = i + 1
}
}
for k, v := range resource {
i, err := strconv.ParseInt(k, 16, 32)
if err != nil {
continue
}
pinyinMap[rune(i)] = v
}
initialized = true
}
func New(origin string) *pinyin {
return &pinyin{
origin: origin,
split: " ",
mode: WithoutTone,
}
}
func (py *pinyin) Split(split string) *pinyin {
py.split = split
return py
}
func (py *pinyin) Mode(mode Mode) *pinyin {
py.mode = mode
return py
}
func (py *pinyin) Convert() (string, error) {
if !initialized {
return "", ErrInitialize
}
sr := []rune(py.origin)
words := make([]string, 0)
var temp string
for i, s := range sr {
_, ok := pinyinMap[s]
if !ok {
// 非中文处理
temp += string(s)
if i == len(sr)-1 {
words = append(words, temp)
}
continue
}
word, err := getPinyin(s, py.mode)
if err != nil {
return "", err
}
if len(temp) > 0 {
words = append(words, temp)
temp = ""
}
if len(word) > 0 {
words = append(words, word)
}
}
result := strings.Join(words, py.split)
result = strings.Replace(result, " ", " ", -1)
result = strings.Replace(result, " ", " ", -1)
return result, nil
}
func getPinyin(hanzi rune, mode Mode) (string, error) {
if !initialized {
return "", ErrInitialize
}
switch mode {
case Tone:
return getTone(hanzi), nil
case InitialsInCapitals:
return getInitialsInCapitals(hanzi), nil
default:
return getDefault(hanzi), nil
}
}
func getTone(hanzi rune) string {
return pinyinMap[hanzi]
}
func getDefault(hanzi rune) string {
tone := getTone(hanzi)
if tone == "" {
return tone
}
output := make([]rune, utf8.RuneCountInString(tone))
count := 0
for _, t := range tone {
neutral, found := tonesMap[t]
if found {
output[count] = neutral
} else {
output[count] = t
}
count++
}
return string(output)
}
func getInitialsInCapitals(hanzi rune) string {
def := getDefault(hanzi)
if def == "" {
return def
}
sr := []rune(def)
if sr[0] > 32 {
sr[0] = sr[0] - 32
}
return string(sr)
}