mirror of https://github.com/gohugoio/hugo
261 lines
7.0 KiB
Go
261 lines
7.0 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package template
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// endsWithCSSKeyword reports whether b ends with an ident that
|
|
// case-insensitively matches the lower-case kw.
|
|
func endsWithCSSKeyword(b []byte, kw string) bool {
|
|
i := len(b) - len(kw)
|
|
if i < 0 {
|
|
// Too short.
|
|
return false
|
|
}
|
|
if i != 0 {
|
|
r, _ := utf8.DecodeLastRune(b[:i])
|
|
if isCSSNmchar(r) {
|
|
// Too long.
|
|
return false
|
|
}
|
|
}
|
|
// Many CSS keywords, such as "!important" can have characters encoded,
|
|
// but the URI production does not allow that according to
|
|
// https://www.w3.org/TR/css3-syntax/#TOK-URI
|
|
// This does not attempt to recognize encoded keywords. For example,
|
|
// given "\75\72\6c" and "url" this return false.
|
|
return string(bytes.ToLower(b[i:])) == kw
|
|
}
|
|
|
|
// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
|
|
func isCSSNmchar(r rune) bool {
|
|
// Based on the CSS3 nmchar production but ignores multi-rune escape
|
|
// sequences.
|
|
// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
|
|
return 'a' <= r && r <= 'z' ||
|
|
'A' <= r && r <= 'Z' ||
|
|
'0' <= r && r <= '9' ||
|
|
r == '-' ||
|
|
r == '_' ||
|
|
// Non-ASCII cases below.
|
|
0x80 <= r && r <= 0xd7ff ||
|
|
0xe000 <= r && r <= 0xfffd ||
|
|
0x10000 <= r && r <= 0x10ffff
|
|
}
|
|
|
|
// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
|
|
// If there is no change, it returns the input, otherwise it returns a slice
|
|
// backed by a new array.
|
|
// https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
|
|
func decodeCSS(s []byte) []byte {
|
|
i := bytes.IndexByte(s, '\\')
|
|
if i == -1 {
|
|
return s
|
|
}
|
|
// The UTF-8 sequence for a codepoint is never longer than 1 + the
|
|
// number hex digits need to represent that codepoint, so len(s) is an
|
|
// upper bound on the output length.
|
|
b := make([]byte, 0, len(s))
|
|
for len(s) != 0 {
|
|
i := bytes.IndexByte(s, '\\')
|
|
if i == -1 {
|
|
i = len(s)
|
|
}
|
|
b, s = append(b, s[:i]...), s[i:]
|
|
if len(s) < 2 {
|
|
break
|
|
}
|
|
// https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
|
|
// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
|
|
if isHex(s[1]) {
|
|
// https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
|
|
// unicode ::= '\' [0-9a-fA-F]{1,6} wc?
|
|
j := 2
|
|
for j < len(s) && j < 7 && isHex(s[j]) {
|
|
j++
|
|
}
|
|
r := hexDecode(s[1:j])
|
|
if r > unicode.MaxRune {
|
|
r, j = r/16, j-1
|
|
}
|
|
n := utf8.EncodeRune(b[len(b):cap(b)], r)
|
|
// The optional space at the end allows a hex
|
|
// sequence to be followed by a literal hex.
|
|
// string(decodeCSS([]byte(`\A B`))) == "\nB"
|
|
b, s = b[:len(b)+n], skipCSSSpace(s[j:])
|
|
} else {
|
|
// `\\` decodes to `\` and `\"` to `"`.
|
|
_, n := utf8.DecodeRune(s[1:])
|
|
b, s = append(b, s[1:1+n]...), s[1+n:]
|
|
}
|
|
}
|
|
return b
|
|
}
|
|
|
|
// isHex reports whether the given character is a hex digit.
|
|
func isHex(c byte) bool {
|
|
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
|
|
}
|
|
|
|
// hexDecode decodes a short hex digit sequence: "10" -> 16.
|
|
func hexDecode(s []byte) rune {
|
|
n := '\x00'
|
|
for _, c := range s {
|
|
n <<= 4
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
n |= rune(c - '0')
|
|
case 'a' <= c && c <= 'f':
|
|
n |= rune(c-'a') + 10
|
|
case 'A' <= c && c <= 'F':
|
|
n |= rune(c-'A') + 10
|
|
default:
|
|
panic(fmt.Sprintf("Bad hex digit in %q", s))
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
// skipCSSSpace returns a suffix of c, skipping over a single space.
|
|
func skipCSSSpace(c []byte) []byte {
|
|
if len(c) == 0 {
|
|
return c
|
|
}
|
|
// wc ::= #x9 | #xA | #xC | #xD | #x20
|
|
switch c[0] {
|
|
case '\t', '\n', '\f', ' ':
|
|
return c[1:]
|
|
case '\r':
|
|
// This differs from CSS3's wc production because it contains a
|
|
// probable spec error whereby wc contains all the single byte
|
|
// sequences in nl (newline) but not CRLF.
|
|
if len(c) >= 2 && c[1] == '\n' {
|
|
return c[2:]
|
|
}
|
|
return c[1:]
|
|
}
|
|
return c
|
|
}
|
|
|
|
// isCSSSpace reports whether b is a CSS space char as defined in wc.
|
|
func isCSSSpace(b byte) bool {
|
|
switch b {
|
|
case '\t', '\n', '\f', '\r', ' ':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
|
|
func cssEscaper(args ...any) string {
|
|
s, _ := stringify(args...)
|
|
var b strings.Builder
|
|
r, w, written := rune(0), 0, 0
|
|
for i := 0; i < len(s); i += w {
|
|
// See comment in htmlEscaper.
|
|
r, w = utf8.DecodeRuneInString(s[i:])
|
|
var repl string
|
|
switch {
|
|
case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
|
|
repl = cssReplacementTable[r]
|
|
default:
|
|
continue
|
|
}
|
|
if written == 0 {
|
|
b.Grow(len(s))
|
|
}
|
|
b.WriteString(s[written:i])
|
|
b.WriteString(repl)
|
|
written = i + w
|
|
if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
|
|
b.WriteByte(' ')
|
|
}
|
|
}
|
|
if written == 0 {
|
|
return s
|
|
}
|
|
b.WriteString(s[written:])
|
|
return b.String()
|
|
}
|
|
|
|
var cssReplacementTable = []string{
|
|
0: `\0`,
|
|
'\t': `\9`,
|
|
'\n': `\a`,
|
|
'\f': `\c`,
|
|
'\r': `\d`,
|
|
// Encode HTML specials as hex so the output can be embedded
|
|
// in HTML attributes without further encoding.
|
|
'"': `\22`,
|
|
'&': `\26`,
|
|
'\'': `\27`,
|
|
'(': `\28`,
|
|
')': `\29`,
|
|
'+': `\2b`,
|
|
'/': `\2f`,
|
|
':': `\3a`,
|
|
';': `\3b`,
|
|
'<': `\3c`,
|
|
'>': `\3e`,
|
|
'\\': `\\`,
|
|
'{': `\7b`,
|
|
'}': `\7d`,
|
|
}
|
|
|
|
var expressionBytes = []byte("expression")
|
|
var mozBindingBytes = []byte("mozbinding")
|
|
|
|
// cssValueFilter allows innocuous CSS values in the output including CSS
|
|
// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
|
|
// (inherit, blue), and colors (#888).
|
|
// It filters out unsafe values, such as those that affect token boundaries,
|
|
// and anything that might execute scripts.
|
|
func cssValueFilter(args ...any) string {
|
|
s, t := stringify(args...)
|
|
if t == contentTypeCSS {
|
|
return s
|
|
}
|
|
b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
|
|
|
|
// CSS3 error handling is specified as honoring string boundaries per
|
|
// https://www.w3.org/TR/css3-syntax/#error-handling :
|
|
// Malformed declarations. User agents must handle unexpected
|
|
// tokens encountered while parsing a declaration by reading until
|
|
// the end of the declaration, while observing the rules for
|
|
// matching pairs of (), [], {}, "", and '', and correctly handling
|
|
// escapes. For example, a malformed declaration may be missing a
|
|
// property, colon (:) or value.
|
|
// So we need to make sure that values do not have mismatched bracket
|
|
// or quote characters to prevent the browser from restarting parsing
|
|
// inside a string that might embed JavaScript source.
|
|
for i, c := range b {
|
|
switch c {
|
|
case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}', '<', '>':
|
|
return filterFailsafe
|
|
case '-':
|
|
// Disallow <!-- or -->.
|
|
// -- should not appear in valid identifiers.
|
|
if i != 0 && b[i-1] == '-' {
|
|
return filterFailsafe
|
|
}
|
|
default:
|
|
if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
|
|
id = append(id, c)
|
|
}
|
|
}
|
|
}
|
|
id = bytes.ToLower(id)
|
|
if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
|
|
return filterFailsafe
|
|
}
|
|
return string(b)
|
|
}
|