forked from golang/hotime
416 lines
14 KiB
Go
416 lines
14 KiB
Go
|
// Copyright 2017 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// Package catmsg contains support types for package x/text/message/catalog.
|
||
|
//
|
||
|
// This package contains the low-level implementations of Message used by the
|
||
|
// catalog package and provides primitives for other packages to implement their
|
||
|
// own. For instance, the plural package provides functionality for selecting
|
||
|
// translation strings based on the plural category of substitution arguments.
|
||
|
//
|
||
|
//
|
||
|
// Encoding and Decoding
|
||
|
//
|
||
|
// Catalogs store Messages encoded as a single string. Compiling a message into
|
||
|
// a string both results in compacter representation and speeds up evaluation.
|
||
|
//
|
||
|
// A Message must implement a Compile method to convert its arbitrary
|
||
|
// representation to a string. The Compile method takes an Encoder which
|
||
|
// facilitates serializing the message. Encoders also provide more context of
|
||
|
// the messages's creation (such as for which language the message is intended),
|
||
|
// which may not be known at the time of the creation of the message.
|
||
|
//
|
||
|
// Each message type must also have an accompanying decoder registered to decode
|
||
|
// the message. This decoder takes a Decoder argument which provides the
|
||
|
// counterparts for the decoding.
|
||
|
//
|
||
|
//
|
||
|
// Renderers
|
||
|
//
|
||
|
// A Decoder must be initialized with a Renderer implementation. These
|
||
|
// implementations must be provided by packages that use Catalogs, typically
|
||
|
// formatting packages such as x/text/message. A typical user will not need to
|
||
|
// worry about this type; it is only relevant to packages that do string
|
||
|
// formatting and want to use the catalog package to handle localized strings.
|
||
|
//
|
||
|
// A package that uses catalogs for selecting strings receives selection results
|
||
|
// as sequence of substrings passed to the Renderer. The following snippet shows
|
||
|
// how to express the above example using the message package.
|
||
|
//
|
||
|
// message.Set(language.English, "You are %d minute(s) late.",
|
||
|
// catalog.Var("minutes", plural.Select(1, "one", "minute")),
|
||
|
// catalog.String("You are %[1]d ${minutes} late."))
|
||
|
//
|
||
|
// p := message.NewPrinter(language.English)
|
||
|
// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
|
||
|
//
|
||
|
// To evaluate the Printf, package message wraps the arguments in a Renderer
|
||
|
// that is passed to the catalog for message decoding. The call sequence that
|
||
|
// results from evaluating the above message, assuming the person is rather
|
||
|
// tardy, is:
|
||
|
//
|
||
|
// Render("You are %[1]d ")
|
||
|
// Arg(1)
|
||
|
// Render("minutes")
|
||
|
// Render(" late.")
|
||
|
//
|
||
|
// The calls to Arg is caused by the plural.Select execution, which evaluates
|
||
|
// the argument to determine whether the singular or plural message form should
|
||
|
// be selected. The calls to Render reports the partial results to the message
|
||
|
// package for further evaluation.
|
||
|
package catmsg
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
"sync"
|
||
|
|
||
|
"golang.org/x/text/language"
|
||
|
)
|
||
|
|
||
|
// A Handle refers to a registered message type.
|
||
|
type Handle int
|
||
|
|
||
|
// A Handler decodes and evaluates data compiled by a Message and sends the
|
||
|
// result to the Decoder. The output may depend on the value of the substitution
|
||
|
// arguments, accessible by the Decoder's Arg method. The Handler returns false
|
||
|
// if there is no translation for the given substitution arguments.
|
||
|
type Handler func(d *Decoder) bool
|
||
|
|
||
|
// Register records the existence of a message type and returns a Handle that
|
||
|
// can be used in the Encoder's EncodeMessageType method to create such
|
||
|
// messages. The prefix of the name should be the package path followed by
|
||
|
// an optional disambiguating string.
|
||
|
// Register will panic if a handle for the same name was already registered.
|
||
|
func Register(name string, handler Handler) Handle {
|
||
|
mutex.Lock()
|
||
|
defer mutex.Unlock()
|
||
|
|
||
|
if _, ok := names[name]; ok {
|
||
|
panic(fmt.Errorf("catmsg: handler for %q already exists", name))
|
||
|
}
|
||
|
h := Handle(len(handlers))
|
||
|
names[name] = h
|
||
|
handlers = append(handlers, handler)
|
||
|
return h
|
||
|
}
|
||
|
|
||
|
// These handlers require fixed positions in the handlers slice.
|
||
|
const (
|
||
|
msgVars Handle = iota
|
||
|
msgFirst
|
||
|
msgRaw
|
||
|
msgString
|
||
|
msgAffix
|
||
|
// Leave some arbitrary room for future expansion: 20 should suffice.
|
||
|
numInternal = 20
|
||
|
)
|
||
|
|
||
|
const prefix = "golang.org/x/text/internal/catmsg."
|
||
|
|
||
|
var (
|
||
|
// TODO: find a more stable way to link handles to message types.
|
||
|
mutex sync.Mutex
|
||
|
names = map[string]Handle{
|
||
|
prefix + "Vars": msgVars,
|
||
|
prefix + "First": msgFirst,
|
||
|
prefix + "Raw": msgRaw,
|
||
|
prefix + "String": msgString,
|
||
|
prefix + "Affix": msgAffix,
|
||
|
}
|
||
|
handlers = make([]Handler, numInternal)
|
||
|
)
|
||
|
|
||
|
func init() {
|
||
|
// This handler is a message type wrapper that initializes a decoder
|
||
|
// with a variable block. This message type, if present, is always at the
|
||
|
// start of an encoded message.
|
||
|
handlers[msgVars] = func(d *Decoder) bool {
|
||
|
blockSize := int(d.DecodeUint())
|
||
|
d.vars = d.data[:blockSize]
|
||
|
d.data = d.data[blockSize:]
|
||
|
return d.executeMessage()
|
||
|
}
|
||
|
|
||
|
// First takes the first message in a sequence that results in a match for
|
||
|
// the given substitution arguments.
|
||
|
handlers[msgFirst] = func(d *Decoder) bool {
|
||
|
for !d.Done() {
|
||
|
if d.ExecuteMessage() {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
handlers[msgRaw] = func(d *Decoder) bool {
|
||
|
d.Render(d.data)
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// A String message alternates between a string constant and a variable
|
||
|
// substitution.
|
||
|
handlers[msgString] = func(d *Decoder) bool {
|
||
|
for !d.Done() {
|
||
|
if str := d.DecodeString(); str != "" {
|
||
|
d.Render(str)
|
||
|
}
|
||
|
if d.Done() {
|
||
|
break
|
||
|
}
|
||
|
d.ExecuteSubstitution()
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
handlers[msgAffix] = func(d *Decoder) bool {
|
||
|
// TODO: use an alternative method for common cases.
|
||
|
prefix := d.DecodeString()
|
||
|
suffix := d.DecodeString()
|
||
|
if prefix != "" {
|
||
|
d.Render(prefix)
|
||
|
}
|
||
|
ret := d.ExecuteMessage()
|
||
|
if suffix != "" {
|
||
|
d.Render(suffix)
|
||
|
}
|
||
|
return ret
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
// ErrIncomplete indicates a compiled message does not define translations
|
||
|
// for all possible argument values. If this message is returned, evaluating
|
||
|
// a message may result in the ErrNoMatch error.
|
||
|
ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
|
||
|
|
||
|
// ErrNoMatch indicates no translation message matched the given input
|
||
|
// parameters when evaluating a message.
|
||
|
ErrNoMatch = errors.New("catmsg: no translation for inputs")
|
||
|
)
|
||
|
|
||
|
// A Message holds a collection of translations for the same phrase that may
|
||
|
// vary based on the values of substitution arguments.
|
||
|
type Message interface {
|
||
|
// Compile encodes the format string(s) of the message as a string for later
|
||
|
// evaluation.
|
||
|
//
|
||
|
// The first call Compile makes on the encoder must be EncodeMessageType.
|
||
|
// The handle passed to this call may either be a handle returned by
|
||
|
// Register to encode a single custom message, or HandleFirst followed by
|
||
|
// a sequence of calls to EncodeMessage.
|
||
|
//
|
||
|
// Compile must return ErrIncomplete if it is possible for evaluation to
|
||
|
// not match any translation for a given set of formatting parameters.
|
||
|
// For example, selecting a translation based on plural form may not yield
|
||
|
// a match if the form "Other" is not one of the selectors.
|
||
|
//
|
||
|
// Compile may return any other application-specific error. For backwards
|
||
|
// compatibility with package like fmt, which often do not do sanity
|
||
|
// checking of format strings ahead of time, Compile should still make an
|
||
|
// effort to have some sensible fallback in case of an error.
|
||
|
Compile(e *Encoder) error
|
||
|
}
|
||
|
|
||
|
// Compile converts a Message to a data string that can be stored in a Catalog.
|
||
|
// The resulting string can subsequently be decoded by passing to the Execute
|
||
|
// method of a Decoder.
|
||
|
func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
|
||
|
// TODO: pass macros so they can be used for validation.
|
||
|
v := &Encoder{inBody: true} // encoder for variables
|
||
|
v.root = v
|
||
|
e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
|
||
|
err = m.Compile(e)
|
||
|
// This package serves te message package, which in turn is meant to be a
|
||
|
// drop-in replacement for fmt. With the fmt package, format strings are
|
||
|
// evaluated lazily and errors are handled by substituting strings in the
|
||
|
// result, rather then returning an error. Dealing with multiple languages
|
||
|
// makes it more important to check errors ahead of time. We chose to be
|
||
|
// consistent and compatible and allow graceful degradation in case of
|
||
|
// errors.
|
||
|
buf := e.buf[stripPrefix(e.buf):]
|
||
|
if len(v.buf) > 0 {
|
||
|
// Prepend variable block.
|
||
|
b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
|
||
|
b[0] = byte(msgVars)
|
||
|
b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
|
||
|
b = append(b, v.buf...)
|
||
|
b = append(b, buf...)
|
||
|
buf = b
|
||
|
}
|
||
|
if err == nil {
|
||
|
err = v.err
|
||
|
}
|
||
|
return string(buf), err
|
||
|
}
|
||
|
|
||
|
// FirstOf is a message type that prints the first message in the sequence that
|
||
|
// resolves to a match for the given substitution arguments.
|
||
|
type FirstOf []Message
|
||
|
|
||
|
// Compile implements Message.
|
||
|
func (s FirstOf) Compile(e *Encoder) error {
|
||
|
e.EncodeMessageType(msgFirst)
|
||
|
err := ErrIncomplete
|
||
|
for i, m := range s {
|
||
|
if err == nil {
|
||
|
return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
|
||
|
}
|
||
|
err = e.EncodeMessage(m)
|
||
|
}
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// Var defines a message that can be substituted for a placeholder of the same
|
||
|
// name. If an expression does not result in a string after evaluation, Name is
|
||
|
// used as the substitution. For example:
|
||
|
// Var{
|
||
|
// Name: "minutes",
|
||
|
// Message: plural.Select(1, "one", "minute"),
|
||
|
// }
|
||
|
// will resolve to minute for singular and minutes for plural forms.
|
||
|
type Var struct {
|
||
|
Name string
|
||
|
Message Message
|
||
|
}
|
||
|
|
||
|
var errIsVar = errors.New("catmsg: variable used as message")
|
||
|
|
||
|
// Compile implements Message.
|
||
|
//
|
||
|
// Note that this method merely registers a variable; it does not create an
|
||
|
// encoded message.
|
||
|
func (v *Var) Compile(e *Encoder) error {
|
||
|
if err := e.addVar(v.Name, v.Message); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
// Using a Var by itself is an error. If it is in a sequence followed by
|
||
|
// other messages referring to it, this error will be ignored.
|
||
|
return errIsVar
|
||
|
}
|
||
|
|
||
|
// Raw is a message consisting of a single format string that is passed as is
|
||
|
// to the Renderer.
|
||
|
//
|
||
|
// Note that a Renderer may still do its own variable substitution.
|
||
|
type Raw string
|
||
|
|
||
|
// Compile implements Message.
|
||
|
func (r Raw) Compile(e *Encoder) (err error) {
|
||
|
e.EncodeMessageType(msgRaw)
|
||
|
// Special case: raw strings don't have a size encoding and so don't use
|
||
|
// EncodeString.
|
||
|
e.buf = append(e.buf, r...)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// String is a message consisting of a single format string which contains
|
||
|
// placeholders that may be substituted with variables.
|
||
|
//
|
||
|
// Variable substitutions are marked with placeholders and a variable name of
|
||
|
// the form ${name}. Any other substitutions such as Go templates or
|
||
|
// printf-style substitutions are left to be done by the Renderer.
|
||
|
//
|
||
|
// When evaluation a string interpolation, a Renderer will receive separate
|
||
|
// calls for each placeholder and interstitial string. For example, for the
|
||
|
// message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
|
||
|
// is:
|
||
|
// d.Render("%[1]v ")
|
||
|
// d.Arg(1)
|
||
|
// d.Render(resultOfInvites)
|
||
|
// d.Render(" %[2]v to ")
|
||
|
// d.Arg(2)
|
||
|
// d.Render(resultOfTheir)
|
||
|
// d.Render(" party.")
|
||
|
// where the messages for "invites" and "their" both use a plural.Select
|
||
|
// referring to the first argument.
|
||
|
//
|
||
|
// Strings may also invoke macros. Macros are essentially variables that can be
|
||
|
// reused. Macros may, for instance, be used to make selections between
|
||
|
// different conjugations of a verb. See the catalog package description for an
|
||
|
// overview of macros.
|
||
|
type String string
|
||
|
|
||
|
// Compile implements Message. It parses the placeholder formats and returns
|
||
|
// any error.
|
||
|
func (s String) Compile(e *Encoder) (err error) {
|
||
|
msg := string(s)
|
||
|
const subStart = "${"
|
||
|
hasHeader := false
|
||
|
p := 0
|
||
|
b := []byte{}
|
||
|
for {
|
||
|
i := strings.Index(msg[p:], subStart)
|
||
|
if i == -1 {
|
||
|
break
|
||
|
}
|
||
|
b = append(b, msg[p:p+i]...)
|
||
|
p += i + len(subStart)
|
||
|
if i = strings.IndexByte(msg[p:], '}'); i == -1 {
|
||
|
b = append(b, "$!(MISSINGBRACE)"...)
|
||
|
err = fmt.Errorf("catmsg: missing '}'")
|
||
|
p = len(msg)
|
||
|
break
|
||
|
}
|
||
|
name := strings.TrimSpace(msg[p : p+i])
|
||
|
if q := strings.IndexByte(name, '('); q == -1 {
|
||
|
if !hasHeader {
|
||
|
hasHeader = true
|
||
|
e.EncodeMessageType(msgString)
|
||
|
}
|
||
|
e.EncodeString(string(b))
|
||
|
e.EncodeSubstitution(name)
|
||
|
b = b[:0]
|
||
|
} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
|
||
|
// TODO: what should the error be?
|
||
|
b = append(b, "$!(MISSINGPAREN)"...)
|
||
|
err = fmt.Errorf("catmsg: missing ')'")
|
||
|
} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
|
||
|
// TODO: handle more than one argument
|
||
|
b = append(b, "$!(BADNUM)"...)
|
||
|
err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
|
||
|
} else {
|
||
|
if !hasHeader {
|
||
|
hasHeader = true
|
||
|
e.EncodeMessageType(msgString)
|
||
|
}
|
||
|
e.EncodeString(string(b))
|
||
|
e.EncodeSubstitution(name[:q], int(x))
|
||
|
b = b[:0]
|
||
|
}
|
||
|
p += i + 1
|
||
|
}
|
||
|
b = append(b, msg[p:]...)
|
||
|
if !hasHeader {
|
||
|
// Simplify string to a raw string.
|
||
|
Raw(string(b)).Compile(e)
|
||
|
} else if len(b) > 0 {
|
||
|
e.EncodeString(string(b))
|
||
|
}
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// Affix is a message that adds a prefix and suffix to another message.
|
||
|
// This is mostly used add back whitespace to a translation that was stripped
|
||
|
// before sending it out.
|
||
|
type Affix struct {
|
||
|
Message Message
|
||
|
Prefix string
|
||
|
Suffix string
|
||
|
}
|
||
|
|
||
|
// Compile implements Message.
|
||
|
func (a Affix) Compile(e *Encoder) (err error) {
|
||
|
// TODO: consider adding a special message type that just adds a single
|
||
|
// return. This is probably common enough to handle the majority of cases.
|
||
|
// Get some stats first, though.
|
||
|
e.EncodeMessageType(msgAffix)
|
||
|
e.EncodeString(a.Prefix)
|
||
|
e.EncodeString(a.Suffix)
|
||
|
e.EncodeMessage(a.Message)
|
||
|
return nil
|
||
|
}
|