mirror of https://github.com/caddyserver/caddy
380 lines
10 KiB
Go
380 lines
10 KiB
Go
// Copyright 2015 Matthew Holt and The Caddy Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package caddyfile
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
type (
|
|
// lexer is a utility which can get values, token by
|
|
// token, from a Reader. A token is a word, and tokens
|
|
// are separated by whitespace. A word can be enclosed
|
|
// in quotes if it contains whitespace.
|
|
lexer struct {
|
|
reader *bufio.Reader
|
|
token Token
|
|
line int
|
|
skippedLines int
|
|
}
|
|
|
|
// Token represents a single parsable unit.
|
|
Token struct {
|
|
File string
|
|
imports []string
|
|
Line int
|
|
Text string
|
|
wasQuoted rune // enclosing quote character, if any
|
|
heredocMarker string
|
|
snippetName string
|
|
}
|
|
)
|
|
|
|
// Tokenize takes bytes as input and lexes it into
|
|
// a list of tokens that can be parsed as a Caddyfile.
|
|
// Also takes a filename to fill the token's File as
|
|
// the source of the tokens, which is important to
|
|
// determine relative paths for `import` directives.
|
|
func Tokenize(input []byte, filename string) ([]Token, error) {
|
|
l := lexer{}
|
|
if err := l.load(bytes.NewReader(input)); err != nil {
|
|
return nil, err
|
|
}
|
|
var tokens []Token
|
|
for {
|
|
found, err := l.next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !found {
|
|
break
|
|
}
|
|
l.token.File = filename
|
|
tokens = append(tokens, l.token)
|
|
}
|
|
return tokens, nil
|
|
}
|
|
|
|
// load prepares the lexer to scan an input for tokens.
|
|
// It discards any leading byte order mark.
|
|
func (l *lexer) load(input io.Reader) error {
|
|
l.reader = bufio.NewReader(input)
|
|
l.line = 1
|
|
|
|
// discard byte order mark, if present
|
|
firstCh, _, err := l.reader.ReadRune()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if firstCh != 0xFEFF {
|
|
err := l.reader.UnreadRune()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// next loads the next token into the lexer.
|
|
// A token is delimited by whitespace, unless
|
|
// the token starts with a quotes character (")
|
|
// in which case the token goes until the closing
|
|
// quotes (the enclosing quotes are not included).
|
|
// Inside quoted strings, quotes may be escaped
|
|
// with a preceding \ character. No other chars
|
|
// may be escaped. The rest of the line is skipped
|
|
// if a "#" character is read in. Returns true if
|
|
// a token was loaded; false otherwise.
|
|
func (l *lexer) next() (bool, error) {
|
|
var val []rune
|
|
var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool
|
|
var heredocMarker string
|
|
|
|
makeToken := func(quoted rune) bool {
|
|
l.token.Text = string(val)
|
|
l.token.wasQuoted = quoted
|
|
l.token.heredocMarker = heredocMarker
|
|
return true
|
|
}
|
|
|
|
for {
|
|
// Read a character in; if err then if we had
|
|
// read some characters, make a token. If we
|
|
// reached EOF, then no more tokens to read.
|
|
// If no EOF, then we had a problem.
|
|
ch, _, err := l.reader.ReadRune()
|
|
if err != nil {
|
|
if len(val) > 0 {
|
|
if inHeredoc {
|
|
return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker)
|
|
}
|
|
|
|
return makeToken(0), nil
|
|
}
|
|
if err == io.EOF {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
// detect whether we have the start of a heredoc
|
|
if !(quoted || btQuoted) && !(inHeredoc || heredocEscaped) &&
|
|
len(val) > 1 && string(val[:2]) == "<<" {
|
|
// a space means it's just a regular token and not a heredoc
|
|
if ch == ' ' {
|
|
return makeToken(0), nil
|
|
}
|
|
|
|
// skip CR, we only care about LF
|
|
if ch == '\r' {
|
|
continue
|
|
}
|
|
|
|
// after hitting a newline, we know that the heredoc marker
|
|
// is the characters after the two << and the newline.
|
|
// we reset the val because the heredoc is syntax we don't
|
|
// want to keep.
|
|
if ch == '\n' {
|
|
if len(val) == 2 {
|
|
return false, fmt.Errorf("missing opening heredoc marker on line #%d; must contain only alpha-numeric characters, dashes and underscores; got empty string", l.line)
|
|
}
|
|
|
|
// check if there's too many <
|
|
if string(val[:3]) == "<<<" {
|
|
return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example <<END", l.line)
|
|
}
|
|
|
|
heredocMarker = string(val[2:])
|
|
if !heredocMarkerRegexp.Match([]byte(heredocMarker)) {
|
|
return false, fmt.Errorf("heredoc marker on line #%d must contain only alpha-numeric characters, dashes and underscores; got '%s'", l.line, heredocMarker)
|
|
}
|
|
|
|
inHeredoc = true
|
|
l.skippedLines++
|
|
val = nil
|
|
continue
|
|
}
|
|
val = append(val, ch)
|
|
continue
|
|
}
|
|
|
|
// if we're in a heredoc, all characters are read as-is
|
|
if inHeredoc {
|
|
val = append(val, ch)
|
|
|
|
if ch == '\n' {
|
|
l.skippedLines++
|
|
}
|
|
|
|
// check if we're done, i.e. that the last few characters are the marker
|
|
if len(val) > len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) {
|
|
// set the final value
|
|
val, err = l.finalizeHeredoc(val, heredocMarker)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// set the line counter, and make the token
|
|
l.line += l.skippedLines
|
|
l.skippedLines = 0
|
|
return makeToken('<'), nil
|
|
}
|
|
|
|
// stay in the heredoc until we find the ending marker
|
|
continue
|
|
}
|
|
|
|
// track whether we found an escape '\' for the next
|
|
// iteration to be contextually aware
|
|
if !escaped && !btQuoted && ch == '\\' {
|
|
escaped = true
|
|
continue
|
|
}
|
|
|
|
if quoted || btQuoted {
|
|
if quoted && escaped {
|
|
// all is literal in quoted area,
|
|
// so only escape quotes
|
|
if ch != '"' {
|
|
val = append(val, '\\')
|
|
}
|
|
escaped = false
|
|
} else {
|
|
if (quoted && ch == '"') || (btQuoted && ch == '`') {
|
|
return makeToken(ch), nil
|
|
}
|
|
}
|
|
// allow quoted text to wrap continue on multiple lines
|
|
if ch == '\n' {
|
|
l.line += 1 + l.skippedLines
|
|
l.skippedLines = 0
|
|
}
|
|
// collect this character as part of the quoted token
|
|
val = append(val, ch)
|
|
continue
|
|
}
|
|
|
|
if unicode.IsSpace(ch) {
|
|
// ignore CR altogether, we only actually care about LF (\n)
|
|
if ch == '\r' {
|
|
continue
|
|
}
|
|
// end of the line
|
|
if ch == '\n' {
|
|
// newlines can be escaped to chain arguments
|
|
// onto multiple lines; else, increment the line count
|
|
if escaped {
|
|
l.skippedLines++
|
|
escaped = false
|
|
} else {
|
|
l.line += 1 + l.skippedLines
|
|
l.skippedLines = 0
|
|
}
|
|
// comments (#) are single-line only
|
|
comment = false
|
|
}
|
|
// any kind of space means we're at the end of this token
|
|
if len(val) > 0 {
|
|
return makeToken(0), nil
|
|
}
|
|
continue
|
|
}
|
|
|
|
// comments must be at the start of a token,
|
|
// in other words, preceded by space or newline
|
|
if ch == '#' && len(val) == 0 {
|
|
comment = true
|
|
}
|
|
if comment {
|
|
continue
|
|
}
|
|
|
|
if len(val) == 0 {
|
|
l.token = Token{Line: l.line}
|
|
if ch == '"' {
|
|
quoted = true
|
|
continue
|
|
}
|
|
if ch == '`' {
|
|
btQuoted = true
|
|
continue
|
|
}
|
|
}
|
|
|
|
if escaped {
|
|
// allow escaping the first < to skip the heredoc syntax
|
|
if ch == '<' {
|
|
heredocEscaped = true
|
|
} else {
|
|
val = append(val, '\\')
|
|
}
|
|
escaped = false
|
|
}
|
|
|
|
val = append(val, ch)
|
|
}
|
|
}
|
|
|
|
// finalizeHeredoc takes the runes read as the heredoc text and the marker,
|
|
// and processes the text to strip leading whitespace, returning the final
|
|
// value without the leading whitespace.
|
|
func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) {
|
|
stringVal := string(val)
|
|
|
|
// find the last newline of the heredoc, which is where the contents end
|
|
lastNewline := strings.LastIndex(stringVal, "\n")
|
|
|
|
// collapse the content, then split into separate lines
|
|
lines := strings.Split(stringVal[:lastNewline+1], "\n")
|
|
|
|
// figure out how much whitespace we need to strip from the front of every line
|
|
// by getting the string that precedes the marker, on the last line
|
|
paddingToStrip := stringVal[lastNewline+1 : len(stringVal)-len(marker)]
|
|
|
|
// iterate over each line and strip the whitespace from the front
|
|
var out string
|
|
for lineNum, lineText := range lines[:len(lines)-1] {
|
|
// find an exact match for the padding
|
|
index := strings.Index(lineText, paddingToStrip)
|
|
|
|
// if the padding doesn't match exactly at the start then we can't safely strip
|
|
if index != 0 {
|
|
return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip)
|
|
}
|
|
|
|
// strip, then append the line, with the newline, to the output.
|
|
// also removes all "\r" because Windows.
|
|
out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "")
|
|
}
|
|
|
|
// Remove the trailing newline from the loop
|
|
if len(out) > 0 && out[len(out)-1] == '\n' {
|
|
out = out[:len(out)-1]
|
|
}
|
|
|
|
// return the final value
|
|
return []rune(out), nil
|
|
}
|
|
|
|
func (t Token) Quoted() bool {
|
|
return t.wasQuoted > 0
|
|
}
|
|
|
|
// NumLineBreaks counts how many line breaks are in the token text.
|
|
func (t Token) NumLineBreaks() int {
|
|
lineBreaks := strings.Count(t.Text, "\n")
|
|
if t.wasQuoted == '<' {
|
|
// heredocs have an extra linebreak because the opening
|
|
// delimiter is on its own line and is not included in the
|
|
// token Text itself, and the trailing newline is removed.
|
|
lineBreaks += 2
|
|
}
|
|
return lineBreaks
|
|
}
|
|
|
|
var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$")
|
|
|
|
// isNextOnNewLine tests whether t2 is on a different line from t1
|
|
func isNextOnNewLine(t1, t2 Token) bool {
|
|
// If the second token is from a different file,
|
|
// we can assume it's from a different line
|
|
if t1.File != t2.File {
|
|
return true
|
|
}
|
|
|
|
// If the second token is from a different import chain,
|
|
// we can assume it's from a different line
|
|
if len(t1.imports) != len(t2.imports) {
|
|
return true
|
|
}
|
|
for i, im := range t1.imports {
|
|
if im != t2.imports[i] {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// If the first token (incl line breaks) ends
|
|
// on a line earlier than the next token,
|
|
// then the second token is on a new line
|
|
return t1.Line+t1.NumLineBreaks() < t2.Line
|
|
}
|