285 lines
8.0 KiB
JavaScript
285 lines
8.0 KiB
JavaScript
'use strict'
|
|
|
|
/*
|
|
Port of a subset of the features of CPython's shlex module, which provides a
|
|
shell-like lexer. Original code by Eric S. Raymond and other contributors.
|
|
*/
|
|
|
|
class Shlexer {
|
|
constructor (string) {
|
|
this.i = 0
|
|
this.string = string
|
|
|
|
/**
|
|
* Characters that will be considered whitespace and skipped. Whitespace
|
|
* bounds tokens. By default, includes space, tab, linefeed and carriage
|
|
* return.
|
|
*/
|
|
this.whitespace = ' \t\r\n'
|
|
|
|
/**
|
|
* Characters that will be considered string quotes. The token accumulates
|
|
* until the same quote is encountered again (thus, different quote types
|
|
* protect each other as in the shell.) By default, includes ASCII single
|
|
* and double quotes.
|
|
*/
|
|
this.quotes = `'"`
|
|
|
|
/**
|
|
* Characters that will be considered as escape. Just `\` by default.
|
|
*/
|
|
this.escapes = '\\'
|
|
|
|
/**
|
|
* The subset of quote types that allow escaped characters. Just `"` by default.
|
|
*/
|
|
this.escapedQuotes = '"'
|
|
|
|
/**
|
|
* Whether to support ANSI C-style $'' quotes
|
|
* https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
|
|
*/
|
|
this.ansiCQuotes = true
|
|
|
|
/**
|
|
* Whether to support localized $"" quotes
|
|
* https://www.gnu.org/software/bash/manual/html_node/Locale-Translation.html
|
|
*
|
|
* The behavior is as if the current locale is set to C or POSIX, i.e., the
|
|
* contents are not translated.
|
|
*/
|
|
this.localeQuotes = true
|
|
|
|
this.debug = false
|
|
}
|
|
|
|
readChar () {
|
|
return this.string.charAt(this.i++)
|
|
}
|
|
|
|
processEscapes (string, quote, isAnsiCQuote) {
|
|
if (!isAnsiCQuote && !this.escapedQuotes.includes(quote)) {
|
|
// This quote type doesn't support escape sequences
|
|
return string
|
|
}
|
|
|
|
// We need to form a regex that matches any of the escape characters,
|
|
// without interpreting any of the characters as a regex special character.
|
|
let anyEscape = '[' + this.escapes.replace(/(.)/g, '\\$1') + ']'
|
|
|
|
// In regular quoted strings, we can only escape an escape character, and
|
|
// the quote character itself.
|
|
if (!isAnsiCQuote && this.escapedQuotes.includes(quote)) {
|
|
let re = new RegExp(
|
|
anyEscape + '(' + anyEscape + '|\\' + quote + ')', 'g')
|
|
return string.replace(re, '$1')
|
|
}
|
|
|
|
// ANSI C quoted strings support a wide variety of escape sequences
|
|
if (isAnsiCQuote) {
|
|
let patterns = {
|
|
// Literal characters
|
|
'([\\\\\'"?])': (x) => x,
|
|
|
|
// Non-printable ASCII characters
|
|
'a': () => '\x07',
|
|
'b': () => '\x08',
|
|
'e|E': () => '\x1b',
|
|
'f': () => '\x0c',
|
|
'n': () => '\x0a',
|
|
'r': () => '\x0d',
|
|
't': () => '\x09',
|
|
'v': () => '\x0b',
|
|
|
|
// Octal bytes
|
|
'([0-7]{1,3})': (x) => String.fromCharCode(parseInt(x, 8)),
|
|
|
|
// Hexadecimal bytes
|
|
'x([0-9a-fA-F]{1,2})': (x) => String.fromCharCode(parseInt(x, 16)),
|
|
|
|
// Unicode code units
|
|
'u([0-9a-fA-F]{1,4})': (x) => String.fromCharCode(parseInt(x, 16)),
|
|
'U([0-9a-fA-F]{1,8})': (x) => String.fromCharCode(parseInt(x, 16)),
|
|
|
|
// Control characters
|
|
// https://en.wikipedia.org/wiki/Control_character#How_control_characters_map_to_keyboards
|
|
'c(.)': (x) => {
|
|
if (x === '?') {
|
|
return '\x7f'
|
|
} else if (x === '@') {
|
|
return '\x00'
|
|
} else {
|
|
return String.fromCharCode(x.charCodeAt(0) & 31)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Construct an uber-RegEx that catches all of the above pattern
|
|
let re = new RegExp(
|
|
anyEscape + '(' + Object.keys(patterns).join('|') + ')', 'g')
|
|
|
|
// For each match, figure out which subpattern matched, and apply the
|
|
// corresponding function
|
|
return string.replace(re, function (m, p1) {
|
|
for (let matched in patterns) {
|
|
let mm = new RegExp('^' + matched + '$').exec(p1)
|
|
if (mm === null) {
|
|
continue
|
|
}
|
|
|
|
return patterns[matched].apply(null, mm.slice(1))
|
|
}
|
|
})
|
|
}
|
|
|
|
// Should not get here
|
|
return undefined
|
|
}
|
|
|
|
* [Symbol.iterator] () {
|
|
let inQuote = false
|
|
let inDollarQuote = false
|
|
let escaped = false
|
|
let lastDollar = -2 // position of last dollar sign we saw
|
|
let token
|
|
|
|
if (this.debug) {
|
|
console.log('full input:', '>' + this.string + '<')
|
|
}
|
|
|
|
while (true) {
|
|
const pos = this.i
|
|
const char = this.readChar()
|
|
|
|
if (this.debug) {
|
|
console.log(
|
|
'position:', pos,
|
|
'input:', '>' + char + '<',
|
|
'accumulated:', token,
|
|
'inQuote:', inQuote,
|
|
'inDollarQuote:', inDollarQuote,
|
|
'lastDollar:', lastDollar,
|
|
'escaped:', escaped
|
|
)
|
|
}
|
|
|
|
// Ran out of characters, we're done
|
|
if (char === '') {
|
|
if (inQuote) { throw new Error('Got EOF while in a quoted string') }
|
|
if (escaped) { throw new Error('Got EOF while in an escape sequence') }
|
|
if (token !== undefined) { yield token }
|
|
return
|
|
}
|
|
|
|
// We were in an escape sequence, complete it
|
|
if (escaped) {
|
|
if (char === '\n') {
|
|
// An escaped newline just means to continue the command on the next
|
|
// line. We just need to ignore it.
|
|
} else if (inQuote) {
|
|
// If we are in a quote, just accumulate the whole escape sequence,
|
|
// as we will interpret escape sequences later.
|
|
token = (token || '') + escaped + char
|
|
} else {
|
|
// Just use the literal character
|
|
token = (token || '') + char
|
|
}
|
|
|
|
escaped = false
|
|
continue
|
|
}
|
|
|
|
if (this.escapes.includes(char)) {
|
|
if (!inQuote || inDollarQuote !== false || this.escapedQuotes.includes(inQuote)) {
|
|
// We encountered an escape character, which is going to affect how
|
|
// we treat the next character.
|
|
escaped = char
|
|
continue
|
|
} else {
|
|
// This string type doesn't use escape characters. Ignore for now.
|
|
}
|
|
}
|
|
|
|
// We were in a string
|
|
if (inQuote !== false) {
|
|
// String is finished. Don't grab the quote character.
|
|
if (char === inQuote) {
|
|
token = this.processEscapes(token, inQuote, inDollarQuote === '\'')
|
|
inQuote = false
|
|
inDollarQuote = false
|
|
continue
|
|
}
|
|
|
|
// String isn't finished yet, accumulate the character
|
|
token = (token || '') + char
|
|
continue
|
|
}
|
|
|
|
// This is the start of a new string, don't accumulate the quotation mark
|
|
if (this.quotes.includes(char)) {
|
|
inQuote = char
|
|
if (lastDollar === pos - 1) {
|
|
if (char === '\'' && !this.ansiCQuotes) {
|
|
// Feature not enabled
|
|
} else if (char === '"' && !this.localeQuotes) {
|
|
// Feature not enabled
|
|
} else {
|
|
inDollarQuote = char
|
|
}
|
|
}
|
|
|
|
token = (token || '') // fixes blank string
|
|
|
|
if (inDollarQuote !== false) {
|
|
// Drop the opening $ we captured before
|
|
token = token.slice(0, -1)
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
// This is a dollar sign, record that we saw it in case it's the start of
|
|
// an ANSI C or localized string
|
|
if (inQuote === false && char === '$') {
|
|
lastDollar = pos
|
|
}
|
|
|
|
// This is whitespace, so yield the token if we have one
|
|
if (this.whitespace.includes(char)) {
|
|
if (token !== undefined) { yield token }
|
|
token = undefined
|
|
continue
|
|
}
|
|
|
|
// Otherwise, accumulate the character
|
|
token = (token || '') + char
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Splits a given string using shell-like syntax.
|
|
*
|
|
* @param {String} s String to split.
|
|
* @returns {String[]}
|
|
*/
|
|
exports.split = function (s) {
|
|
return Array.from(new Shlexer(s))
|
|
}
|
|
|
|
/**
|
|
* Escapes a potentially shell-unsafe string using quotes.
|
|
*
|
|
* @param {String} s String to quote
|
|
* @returns {String}
|
|
*/
|
|
exports.quote = function (s) {
|
|
if (s === '') { return '\'\'' }
|
|
|
|
var unsafeRe = /[^\w@%\-+=:,./]/
|
|
if (!unsafeRe.test(s)) { return s }
|
|
|
|
return '\'' + s.replace(/'/g, '\'"\'"\'') + '\''
|
|
}
|