forked from golang/hotime
174 lines
3.2 KiB
Go
174 lines
3.2 KiB
Go
|
package pinyin
|
|||
|
|
|||
|
import (
|
|||
|
"strconv"
|
|||
|
"strings"
|
|||
|
"unicode/utf8"
|
|||
|
)
|
|||
|
|
|||
|
var (
|
|||
|
tones = [][]rune{
|
|||
|
{'ā', 'ē', 'ī', 'ō', 'ū', 'ǖ', 'Ā', 'Ē', 'Ī', 'Ō', 'Ū', 'Ǖ'},
|
|||
|
{'á', 'é', 'í', 'ó', 'ú', 'ǘ', 'Á', 'É', 'Í', 'Ó', 'Ú', 'Ǘ'},
|
|||
|
{'ǎ', 'ě', 'ǐ', 'ǒ', 'ǔ', 'ǚ', 'Ǎ', 'Ě', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǚ'},
|
|||
|
{'à', 'è', 'ì', 'ò', 'ù', 'ǜ', 'À', 'È', 'Ì', 'Ò', 'Ù', 'Ǜ'},
|
|||
|
}
|
|||
|
neutrals = []rune{'a', 'e', 'i', 'o', 'u', 'v', 'A', 'E', 'I', 'O', 'U', 'V'}
|
|||
|
)
|
|||
|
|
|||
|
var (
|
|||
|
// 从带声调的声母到对应的英文字符的映射
|
|||
|
tonesMap map[rune]rune
|
|||
|
|
|||
|
// 从汉字到声调的映射
|
|||
|
numericTonesMap map[rune]int
|
|||
|
|
|||
|
// 从汉字到拼音的映射(带声调)
|
|||
|
pinyinMap map[rune]string
|
|||
|
|
|||
|
initialized bool
|
|||
|
)
|
|||
|
|
|||
|
type Mode int
|
|||
|
|
|||
|
const (
|
|||
|
WithoutTone Mode = iota + 1 // 默认模式,例如:guo
|
|||
|
Tone // 带声调的拼音 例如:guó
|
|||
|
InitialsInCapitals // 首字母大写不带声调,例如:Guo
|
|||
|
)
|
|||
|
|
|||
|
type pinyin struct {
|
|||
|
origin string
|
|||
|
split string
|
|||
|
mode Mode
|
|||
|
}
|
|||
|
|
|||
|
func init() {
|
|||
|
tonesMap = make(map[rune]rune)
|
|||
|
numericTonesMap = make(map[rune]int)
|
|||
|
pinyinMap = make(map[rune]string)
|
|||
|
for i, runes := range tones {
|
|||
|
for j, tone := range runes {
|
|||
|
tonesMap[tone] = neutrals[j]
|
|||
|
numericTonesMap[tone] = i + 1
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
for k, v := range resource {
|
|||
|
i, err := strconv.ParseInt(k, 16, 32)
|
|||
|
if err != nil {
|
|||
|
continue
|
|||
|
}
|
|||
|
pinyinMap[rune(i)] = v
|
|||
|
}
|
|||
|
initialized = true
|
|||
|
}
|
|||
|
|
|||
|
func New(origin string) *pinyin {
|
|||
|
return &pinyin{
|
|||
|
origin: origin,
|
|||
|
split: " ",
|
|||
|
mode: WithoutTone,
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func (py *pinyin) Split(split string) *pinyin {
|
|||
|
py.split = split
|
|||
|
return py
|
|||
|
}
|
|||
|
|
|||
|
func (py *pinyin) Mode(mode Mode) *pinyin {
|
|||
|
py.mode = mode
|
|||
|
return py
|
|||
|
}
|
|||
|
|
|||
|
func (py *pinyin) Convert() (string, error) {
|
|||
|
if !initialized {
|
|||
|
return "", ErrInitialize
|
|||
|
}
|
|||
|
|
|||
|
sr := []rune(py.origin)
|
|||
|
words := make([]string, 0)
|
|||
|
var temp string
|
|||
|
for i, s := range sr {
|
|||
|
_, ok := pinyinMap[s]
|
|||
|
if !ok {
|
|||
|
// 非中文处理
|
|||
|
temp += string(s)
|
|||
|
if i == len(sr)-1 {
|
|||
|
words = append(words, temp)
|
|||
|
}
|
|||
|
continue
|
|||
|
}
|
|||
|
word, err := getPinyin(s, py.mode)
|
|||
|
if err != nil {
|
|||
|
return "", err
|
|||
|
}
|
|||
|
if len(temp) > 0 {
|
|||
|
|
|||
|
words = append(words, temp)
|
|||
|
temp = ""
|
|||
|
}
|
|||
|
if len(word) > 0 {
|
|||
|
words = append(words, word)
|
|||
|
}
|
|||
|
}
|
|||
|
result := strings.Join(words, py.split)
|
|||
|
result = strings.Replace(result, " ", " ", -1)
|
|||
|
result = strings.Replace(result, " ", " ", -1)
|
|||
|
return result, nil
|
|||
|
}
|
|||
|
|
|||
|
func getPinyin(hanzi rune, mode Mode) (string, error) {
|
|||
|
if !initialized {
|
|||
|
return "", ErrInitialize
|
|||
|
}
|
|||
|
|
|||
|
switch mode {
|
|||
|
case Tone:
|
|||
|
return getTone(hanzi), nil
|
|||
|
case InitialsInCapitals:
|
|||
|
return getInitialsInCapitals(hanzi), nil
|
|||
|
default:
|
|||
|
return getDefault(hanzi), nil
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
func getTone(hanzi rune) string {
|
|||
|
return pinyinMap[hanzi]
|
|||
|
}
|
|||
|
|
|||
|
func getDefault(hanzi rune) string {
|
|||
|
tone := getTone(hanzi)
|
|||
|
|
|||
|
if tone == "" {
|
|||
|
return tone
|
|||
|
}
|
|||
|
|
|||
|
output := make([]rune, utf8.RuneCountInString(tone))
|
|||
|
|
|||
|
count := 0
|
|||
|
for _, t := range tone {
|
|||
|
neutral, found := tonesMap[t]
|
|||
|
if found {
|
|||
|
output[count] = neutral
|
|||
|
} else {
|
|||
|
output[count] = t
|
|||
|
}
|
|||
|
count++
|
|||
|
}
|
|||
|
return string(output)
|
|||
|
}
|
|||
|
|
|||
|
func getInitialsInCapitals(hanzi rune) string {
|
|||
|
def := getDefault(hanzi)
|
|||
|
if def == "" {
|
|||
|
return def
|
|||
|
}
|
|||
|
sr := []rune(def)
|
|||
|
if sr[0] > 32 {
|
|||
|
sr[0] = sr[0] - 32
|
|||
|
}
|
|||
|
return string(sr)
|
|||
|
}
|