initial update
This commit is contained in:
902
node_modules/graphql/language/lexer.mjs
generated
vendored
Normal file
902
node_modules/graphql/language/lexer.mjs
generated
vendored
Normal file
@@ -0,0 +1,902 @@
|
||||
import { syntaxError } from '../error/syntaxError.mjs';
|
||||
import { Token } from './ast.mjs';
|
||||
import { dedentBlockStringLines } from './blockString.mjs';
|
||||
import { isDigit, isNameContinue, isNameStart } from './characterClasses.mjs';
|
||||
import { TokenKind } from './tokenKind.mjs';
|
||||
/**
|
||||
* Given a Source object, creates a Lexer for that source.
|
||||
* A Lexer is a stateful stream generator in that every time
|
||||
* it is advanced, it returns the next token in the Source. Assuming the
|
||||
* source lexes, the final Token emitted by the lexer will be of kind
|
||||
* EOF, after which the lexer will repeatedly return the same EOF token
|
||||
* whenever called.
|
||||
*/
|
||||
|
||||
export class Lexer {
|
||||
/**
|
||||
* The previously focused non-ignored token.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The currently focused non-ignored token.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The (1-indexed) line containing the current token.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The character offset at which the current line begins.
|
||||
*/
|
||||
constructor(source) {
|
||||
const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0);
|
||||
this.source = source;
|
||||
this.lastToken = startOfFileToken;
|
||||
this.token = startOfFileToken;
|
||||
this.line = 1;
|
||||
this.lineStart = 0;
|
||||
}
|
||||
|
||||
get [Symbol.toStringTag]() {
|
||||
return 'Lexer';
|
||||
}
|
||||
/**
|
||||
* Advances the token stream to the next non-ignored token.
|
||||
*/
|
||||
|
||||
advance() {
|
||||
this.lastToken = this.token;
|
||||
const token = (this.token = this.lookahead());
|
||||
return token;
|
||||
}
|
||||
/**
|
||||
* Looks ahead and returns the next non-ignored token, but does not change
|
||||
* the state of Lexer.
|
||||
*/
|
||||
|
||||
lookahead() {
|
||||
let token = this.token;
|
||||
|
||||
if (token.kind !== TokenKind.EOF) {
|
||||
do {
|
||||
if (token.next) {
|
||||
token = token.next;
|
||||
} else {
|
||||
// Read the next token and form a link in the token linked-list.
|
||||
const nextToken = readNextToken(this, token.end); // @ts-expect-error next is only mutable during parsing.
|
||||
|
||||
token.next = nextToken; // @ts-expect-error prev is only mutable during parsing.
|
||||
|
||||
nextToken.prev = token;
|
||||
token = nextToken;
|
||||
}
|
||||
} while (token.kind === TokenKind.COMMENT);
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
|
||||
export function isPunctuatorTokenKind(kind) {
|
||||
return (
|
||||
kind === TokenKind.BANG ||
|
||||
kind === TokenKind.DOLLAR ||
|
||||
kind === TokenKind.AMP ||
|
||||
kind === TokenKind.PAREN_L ||
|
||||
kind === TokenKind.PAREN_R ||
|
||||
kind === TokenKind.SPREAD ||
|
||||
kind === TokenKind.COLON ||
|
||||
kind === TokenKind.EQUALS ||
|
||||
kind === TokenKind.AT ||
|
||||
kind === TokenKind.BRACKET_L ||
|
||||
kind === TokenKind.BRACKET_R ||
|
||||
kind === TokenKind.BRACE_L ||
|
||||
kind === TokenKind.PIPE ||
|
||||
kind === TokenKind.BRACE_R
|
||||
);
|
||||
}
|
||||
/**
|
||||
* A Unicode scalar value is any Unicode code point except surrogate code
|
||||
* points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and
|
||||
* 0xE000 to 0x10FFFF.
|
||||
*
|
||||
* SourceCharacter ::
|
||||
* - "Any Unicode scalar value"
|
||||
*/
|
||||
|
||||
function isUnicodeScalarValue(code) {
|
||||
return (
|
||||
(code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff)
|
||||
);
|
||||
}
|
||||
/**
|
||||
* The GraphQL specification defines source text as a sequence of unicode scalar
|
||||
* values (which Unicode defines to exclude surrogate code points). However
|
||||
* JavaScript defines strings as a sequence of UTF-16 code units which may
|
||||
* include surrogates. A surrogate pair is a valid source character as it
|
||||
* encodes a supplementary code point (above U+FFFF), but unpaired surrogate
|
||||
* code points are not valid source characters.
|
||||
*/
|
||||
|
||||
function isSupplementaryCodePoint(body, location) {
|
||||
return (
|
||||
isLeadingSurrogate(body.charCodeAt(location)) &&
|
||||
isTrailingSurrogate(body.charCodeAt(location + 1))
|
||||
);
|
||||
}
|
||||
|
||||
function isLeadingSurrogate(code) {
|
||||
return code >= 0xd800 && code <= 0xdbff;
|
||||
}
|
||||
|
||||
function isTrailingSurrogate(code) {
|
||||
return code >= 0xdc00 && code <= 0xdfff;
|
||||
}
|
||||
/**
|
||||
* Prints the code point (or end of file reference) at a given location in a
|
||||
* source for use in error messages.
|
||||
*
|
||||
* Printable ASCII is printed quoted, while other points are printed in Unicode
|
||||
* code point form (ie. U+1234).
|
||||
*/
|
||||
|
||||
function printCodePointAt(lexer, location) {
|
||||
const code = lexer.source.body.codePointAt(location);
|
||||
|
||||
if (code === undefined) {
|
||||
return TokenKind.EOF;
|
||||
} else if (code >= 0x0020 && code <= 0x007e) {
|
||||
// Printable ASCII
|
||||
const char = String.fromCodePoint(code);
|
||||
return char === '"' ? "'\"'" : `"${char}"`;
|
||||
} // Unicode code point
|
||||
|
||||
return 'U+' + code.toString(16).toUpperCase().padStart(4, '0');
|
||||
}
|
||||
/**
|
||||
* Create a token with line and column location information.
|
||||
*/
|
||||
|
||||
function createToken(lexer, kind, start, end, value) {
|
||||
const line = lexer.line;
|
||||
const col = 1 + start - lexer.lineStart;
|
||||
return new Token(kind, start, end, line, col, value);
|
||||
}
|
||||
/**
|
||||
* Gets the next token from the source starting at the given position.
|
||||
*
|
||||
* This skips over whitespace until it finds the next lexable token, then lexes
|
||||
* punctuators immediately or calls the appropriate helper function for more
|
||||
* complicated tokens.
|
||||
*/
|
||||
|
||||
function readNextToken(lexer, start) {
|
||||
const body = lexer.source.body;
|
||||
const bodyLength = body.length;
|
||||
let position = start;
|
||||
|
||||
while (position < bodyLength) {
|
||||
const code = body.charCodeAt(position); // SourceCharacter
|
||||
|
||||
switch (code) {
|
||||
// Ignored ::
|
||||
// - UnicodeBOM
|
||||
// - WhiteSpace
|
||||
// - LineTerminator
|
||||
// - Comment
|
||||
// - Comma
|
||||
//
|
||||
// UnicodeBOM :: "Byte Order Mark (U+FEFF)"
|
||||
//
|
||||
// WhiteSpace ::
|
||||
// - "Horizontal Tab (U+0009)"
|
||||
// - "Space (U+0020)"
|
||||
//
|
||||
// Comma :: ,
|
||||
case 0xfeff: // <BOM>
|
||||
|
||||
case 0x0009: // \t
|
||||
|
||||
case 0x0020: // <space>
|
||||
|
||||
case 0x002c:
|
||||
// ,
|
||||
++position;
|
||||
continue;
|
||||
// LineTerminator ::
|
||||
// - "New Line (U+000A)"
|
||||
// - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
|
||||
// - "Carriage Return (U+000D)" "New Line (U+000A)"
|
||||
|
||||
case 0x000a:
|
||||
// \n
|
||||
++position;
|
||||
++lexer.line;
|
||||
lexer.lineStart = position;
|
||||
continue;
|
||||
|
||||
case 0x000d:
|
||||
// \r
|
||||
if (body.charCodeAt(position + 1) === 0x000a) {
|
||||
position += 2;
|
||||
} else {
|
||||
++position;
|
||||
}
|
||||
|
||||
++lexer.line;
|
||||
lexer.lineStart = position;
|
||||
continue;
|
||||
// Comment
|
||||
|
||||
case 0x0023:
|
||||
// #
|
||||
return readComment(lexer, position);
|
||||
// Token ::
|
||||
// - Punctuator
|
||||
// - Name
|
||||
// - IntValue
|
||||
// - FloatValue
|
||||
// - StringValue
|
||||
//
|
||||
// Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
|
||||
|
||||
case 0x0021:
|
||||
// !
|
||||
return createToken(lexer, TokenKind.BANG, position, position + 1);
|
||||
|
||||
case 0x0024:
|
||||
// $
|
||||
return createToken(lexer, TokenKind.DOLLAR, position, position + 1);
|
||||
|
||||
case 0x0026:
|
||||
// &
|
||||
return createToken(lexer, TokenKind.AMP, position, position + 1);
|
||||
|
||||
case 0x0028:
|
||||
// (
|
||||
return createToken(lexer, TokenKind.PAREN_L, position, position + 1);
|
||||
|
||||
case 0x0029:
|
||||
// )
|
||||
return createToken(lexer, TokenKind.PAREN_R, position, position + 1);
|
||||
|
||||
case 0x002e:
|
||||
// .
|
||||
if (
|
||||
body.charCodeAt(position + 1) === 0x002e &&
|
||||
body.charCodeAt(position + 2) === 0x002e
|
||||
) {
|
||||
return createToken(lexer, TokenKind.SPREAD, position, position + 3);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 0x003a:
|
||||
// :
|
||||
return createToken(lexer, TokenKind.COLON, position, position + 1);
|
||||
|
||||
case 0x003d:
|
||||
// =
|
||||
return createToken(lexer, TokenKind.EQUALS, position, position + 1);
|
||||
|
||||
case 0x0040:
|
||||
// @
|
||||
return createToken(lexer, TokenKind.AT, position, position + 1);
|
||||
|
||||
case 0x005b:
|
||||
// [
|
||||
return createToken(lexer, TokenKind.BRACKET_L, position, position + 1);
|
||||
|
||||
case 0x005d:
|
||||
// ]
|
||||
return createToken(lexer, TokenKind.BRACKET_R, position, position + 1);
|
||||
|
||||
case 0x007b:
|
||||
// {
|
||||
return createToken(lexer, TokenKind.BRACE_L, position, position + 1);
|
||||
|
||||
case 0x007c:
|
||||
// |
|
||||
return createToken(lexer, TokenKind.PIPE, position, position + 1);
|
||||
|
||||
case 0x007d:
|
||||
// }
|
||||
return createToken(lexer, TokenKind.BRACE_R, position, position + 1);
|
||||
// StringValue
|
||||
|
||||
case 0x0022:
|
||||
// "
|
||||
if (
|
||||
body.charCodeAt(position + 1) === 0x0022 &&
|
||||
body.charCodeAt(position + 2) === 0x0022
|
||||
) {
|
||||
return readBlockString(lexer, position);
|
||||
}
|
||||
|
||||
return readString(lexer, position);
|
||||
} // IntValue | FloatValue (Digit | -)
|
||||
|
||||
if (isDigit(code) || code === 0x002d) {
|
||||
return readNumber(lexer, position, code);
|
||||
} // Name
|
||||
|
||||
if (isNameStart(code)) {
|
||||
return readName(lexer, position);
|
||||
}
|
||||
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
code === 0x0027
|
||||
? 'Unexpected single quote character (\'), did you mean to use a double quote (")?'
|
||||
: isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position)
|
||||
? `Unexpected character: ${printCodePointAt(lexer, position)}.`
|
||||
: `Invalid character: ${printCodePointAt(lexer, position)}.`,
|
||||
);
|
||||
}
|
||||
|
||||
return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength);
|
||||
}
|
||||
/**
|
||||
* Reads a comment token from the source file.
|
||||
*
|
||||
* ```
|
||||
* Comment :: # CommentChar* [lookahead != CommentChar]
|
||||
*
|
||||
* CommentChar :: SourceCharacter but not LineTerminator
|
||||
* ```
|
||||
*/
|
||||
|
||||
function readComment(lexer, start) {
|
||||
const body = lexer.source.body;
|
||||
const bodyLength = body.length;
|
||||
let position = start + 1;
|
||||
|
||||
while (position < bodyLength) {
|
||||
const code = body.charCodeAt(position); // LineTerminator (\n | \r)
|
||||
|
||||
if (code === 0x000a || code === 0x000d) {
|
||||
break;
|
||||
} // SourceCharacter
|
||||
|
||||
if (isUnicodeScalarValue(code)) {
|
||||
++position;
|
||||
} else if (isSupplementaryCodePoint(body, position)) {
|
||||
position += 2;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return createToken(
|
||||
lexer,
|
||||
TokenKind.COMMENT,
|
||||
start,
|
||||
position,
|
||||
body.slice(start + 1, position),
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Reads a number token from the source file, either a FloatValue or an IntValue
|
||||
* depending on whether a FractionalPart or ExponentPart is encountered.
|
||||
*
|
||||
* ```
|
||||
* IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
|
||||
*
|
||||
* IntegerPart ::
|
||||
* - NegativeSign? 0
|
||||
* - NegativeSign? NonZeroDigit Digit*
|
||||
*
|
||||
* NegativeSign :: -
|
||||
*
|
||||
* NonZeroDigit :: Digit but not `0`
|
||||
*
|
||||
* FloatValue ::
|
||||
* - IntegerPart FractionalPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
|
||||
* - IntegerPart FractionalPart [lookahead != {Digit, `.`, NameStart}]
|
||||
* - IntegerPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
|
||||
*
|
||||
* FractionalPart :: . Digit+
|
||||
*
|
||||
* ExponentPart :: ExponentIndicator Sign? Digit+
|
||||
*
|
||||
* ExponentIndicator :: one of `e` `E`
|
||||
*
|
||||
* Sign :: one of + -
|
||||
* ```
|
||||
*/
|
||||
|
||||
function readNumber(lexer, start, firstCode) {
|
||||
const body = lexer.source.body;
|
||||
let position = start;
|
||||
let code = firstCode;
|
||||
let isFloat = false; // NegativeSign (-)
|
||||
|
||||
if (code === 0x002d) {
|
||||
code = body.charCodeAt(++position);
|
||||
} // Zero (0)
|
||||
|
||||
if (code === 0x0030) {
|
||||
code = body.charCodeAt(++position);
|
||||
|
||||
if (isDigit(code)) {
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid number, unexpected digit after 0: ${printCodePointAt(
|
||||
lexer,
|
||||
position,
|
||||
)}.`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
position = readDigits(lexer, position, code);
|
||||
code = body.charCodeAt(position);
|
||||
} // Full stop (.)
|
||||
|
||||
if (code === 0x002e) {
|
||||
isFloat = true;
|
||||
code = body.charCodeAt(++position);
|
||||
position = readDigits(lexer, position, code);
|
||||
code = body.charCodeAt(position);
|
||||
} // E e
|
||||
|
||||
if (code === 0x0045 || code === 0x0065) {
|
||||
isFloat = true;
|
||||
code = body.charCodeAt(++position); // + -
|
||||
|
||||
if (code === 0x002b || code === 0x002d) {
|
||||
code = body.charCodeAt(++position);
|
||||
}
|
||||
|
||||
position = readDigits(lexer, position, code);
|
||||
code = body.charCodeAt(position);
|
||||
} // Numbers cannot be followed by . or NameStart
|
||||
|
||||
if (code === 0x002e || isNameStart(code)) {
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid number, expected digit but got: ${printCodePointAt(
|
||||
lexer,
|
||||
position,
|
||||
)}.`,
|
||||
);
|
||||
}
|
||||
|
||||
return createToken(
|
||||
lexer,
|
||||
isFloat ? TokenKind.FLOAT : TokenKind.INT,
|
||||
start,
|
||||
position,
|
||||
body.slice(start, position),
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Returns the new position in the source after reading one or more digits.
|
||||
*/
|
||||
|
||||
function readDigits(lexer, start, firstCode) {
|
||||
if (!isDigit(firstCode)) {
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
start,
|
||||
`Invalid number, expected digit but got: ${printCodePointAt(
|
||||
lexer,
|
||||
start,
|
||||
)}.`,
|
||||
);
|
||||
}
|
||||
|
||||
const body = lexer.source.body;
|
||||
let position = start + 1; // +1 to skip first firstCode
|
||||
|
||||
while (isDigit(body.charCodeAt(position))) {
|
||||
++position;
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
/**
|
||||
* Reads a single-quote string token from the source file.
|
||||
*
|
||||
* ```
|
||||
* StringValue ::
|
||||
* - `""` [lookahead != `"`]
|
||||
* - `"` StringCharacter+ `"`
|
||||
*
|
||||
* StringCharacter ::
|
||||
* - SourceCharacter but not `"` or `\` or LineTerminator
|
||||
* - `\u` EscapedUnicode
|
||||
* - `\` EscapedCharacter
|
||||
*
|
||||
* EscapedUnicode ::
|
||||
* - `{` HexDigit+ `}`
|
||||
* - HexDigit HexDigit HexDigit HexDigit
|
||||
*
|
||||
* EscapedCharacter :: one of `"` `\` `/` `b` `f` `n` `r` `t`
|
||||
* ```
|
||||
*/
|
||||
|
||||
function readString(lexer, start) {
|
||||
const body = lexer.source.body;
|
||||
const bodyLength = body.length;
|
||||
let position = start + 1;
|
||||
let chunkStart = position;
|
||||
let value = '';
|
||||
|
||||
while (position < bodyLength) {
|
||||
const code = body.charCodeAt(position); // Closing Quote (")
|
||||
|
||||
if (code === 0x0022) {
|
||||
value += body.slice(chunkStart, position);
|
||||
return createToken(lexer, TokenKind.STRING, start, position + 1, value);
|
||||
} // Escape Sequence (\)
|
||||
|
||||
if (code === 0x005c) {
|
||||
value += body.slice(chunkStart, position);
|
||||
const escape =
|
||||
body.charCodeAt(position + 1) === 0x0075 // u
|
||||
? body.charCodeAt(position + 2) === 0x007b // {
|
||||
? readEscapedUnicodeVariableWidth(lexer, position)
|
||||
: readEscapedUnicodeFixedWidth(lexer, position)
|
||||
: readEscapedCharacter(lexer, position);
|
||||
value += escape.value;
|
||||
position += escape.size;
|
||||
chunkStart = position;
|
||||
continue;
|
||||
} // LineTerminator (\n | \r)
|
||||
|
||||
if (code === 0x000a || code === 0x000d) {
|
||||
break;
|
||||
} // SourceCharacter
|
||||
|
||||
if (isUnicodeScalarValue(code)) {
|
||||
++position;
|
||||
} else if (isSupplementaryCodePoint(body, position)) {
|
||||
position += 2;
|
||||
} else {
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid character within String: ${printCodePointAt(
|
||||
lexer,
|
||||
position,
|
||||
)}.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
throw syntaxError(lexer.source, position, 'Unterminated string.');
|
||||
} // The string value and lexed size of an escape sequence.
|
||||
|
||||
function readEscapedUnicodeVariableWidth(lexer, position) {
|
||||
const body = lexer.source.body;
|
||||
let point = 0;
|
||||
let size = 3; // Cannot be larger than 12 chars (\u{00000000}).
|
||||
|
||||
while (size < 12) {
|
||||
const code = body.charCodeAt(position + size++); // Closing Brace (})
|
||||
|
||||
if (code === 0x007d) {
|
||||
// Must be at least 5 chars (\u{0}) and encode a Unicode scalar value.
|
||||
if (size < 5 || !isUnicodeScalarValue(point)) {
|
||||
break;
|
||||
}
|
||||
|
||||
return {
|
||||
value: String.fromCodePoint(point),
|
||||
size,
|
||||
};
|
||||
} // Append this hex digit to the code point.
|
||||
|
||||
point = (point << 4) | readHexDigit(code);
|
||||
|
||||
if (point < 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid Unicode escape sequence: "${body.slice(
|
||||
position,
|
||||
position + size,
|
||||
)}".`,
|
||||
);
|
||||
}
|
||||
|
||||
function readEscapedUnicodeFixedWidth(lexer, position) {
|
||||
const body = lexer.source.body;
|
||||
const code = read16BitHexCode(body, position + 2);
|
||||
|
||||
if (isUnicodeScalarValue(code)) {
|
||||
return {
|
||||
value: String.fromCodePoint(code),
|
||||
size: 6,
|
||||
};
|
||||
} // GraphQL allows JSON-style surrogate pair escape sequences, but only when
|
||||
// a valid pair is formed.
|
||||
|
||||
if (isLeadingSurrogate(code)) {
|
||||
// \u
|
||||
if (
|
||||
body.charCodeAt(position + 6) === 0x005c &&
|
||||
body.charCodeAt(position + 7) === 0x0075
|
||||
) {
|
||||
const trailingCode = read16BitHexCode(body, position + 8);
|
||||
|
||||
if (isTrailingSurrogate(trailingCode)) {
|
||||
// JavaScript defines strings as a sequence of UTF-16 code units and
|
||||
// encodes Unicode code points above U+FFFF using a surrogate pair of
|
||||
// code units. Since this is a surrogate pair escape sequence, just
|
||||
// include both codes into the JavaScript string value. Had JavaScript
|
||||
// not been internally based on UTF-16, then this surrogate pair would
|
||||
// be decoded to retrieve the supplementary code point.
|
||||
return {
|
||||
value: String.fromCodePoint(code, trailingCode),
|
||||
size: 12,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Reads four hexadecimal characters and returns the positive integer that 16bit
|
||||
* hexadecimal string represents. For example, "000f" will return 15, and "dead"
|
||||
* will return 57005.
|
||||
*
|
||||
* Returns a negative number if any char was not a valid hexadecimal digit.
|
||||
*/
|
||||
|
||||
function read16BitHexCode(body, position) {
|
||||
// readHexDigit() returns -1 on error. ORing a negative value with any other
|
||||
// value always produces a negative value.
|
||||
return (
|
||||
(readHexDigit(body.charCodeAt(position)) << 12) |
|
||||
(readHexDigit(body.charCodeAt(position + 1)) << 8) |
|
||||
(readHexDigit(body.charCodeAt(position + 2)) << 4) |
|
||||
readHexDigit(body.charCodeAt(position + 3))
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Reads a hexadecimal character and returns its positive integer value (0-15).
|
||||
*
|
||||
* '0' becomes 0, '9' becomes 9
|
||||
* 'A' becomes 10, 'F' becomes 15
|
||||
* 'a' becomes 10, 'f' becomes 15
|
||||
*
|
||||
* Returns -1 if the provided character code was not a valid hexadecimal digit.
|
||||
*
|
||||
* HexDigit :: one of
|
||||
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
|
||||
* - `A` `B` `C` `D` `E` `F`
|
||||
* - `a` `b` `c` `d` `e` `f`
|
||||
*/
|
||||
|
||||
function readHexDigit(code) {
|
||||
return code >= 0x0030 && code <= 0x0039 // 0-9
|
||||
? code - 0x0030
|
||||
: code >= 0x0041 && code <= 0x0046 // A-F
|
||||
? code - 0x0037
|
||||
: code >= 0x0061 && code <= 0x0066 // a-f
|
||||
? code - 0x0057
|
||||
: -1;
|
||||
}
|
||||
/**
|
||||
* | Escaped Character | Code Point | Character Name |
|
||||
* | ----------------- | ---------- | ---------------------------- |
|
||||
* | `"` | U+0022 | double quote |
|
||||
* | `\` | U+005C | reverse solidus (back slash) |
|
||||
* | `/` | U+002F | solidus (forward slash) |
|
||||
* | `b` | U+0008 | backspace |
|
||||
* | `f` | U+000C | form feed |
|
||||
* | `n` | U+000A | line feed (new line) |
|
||||
* | `r` | U+000D | carriage return |
|
||||
* | `t` | U+0009 | horizontal tab |
|
||||
*/
|
||||
|
||||
function readEscapedCharacter(lexer, position) {
|
||||
const body = lexer.source.body;
|
||||
const code = body.charCodeAt(position + 1);
|
||||
|
||||
switch (code) {
|
||||
case 0x0022:
|
||||
// "
|
||||
return {
|
||||
value: '\u0022',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x005c:
|
||||
// \
|
||||
return {
|
||||
value: '\u005c',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x002f:
|
||||
// /
|
||||
return {
|
||||
value: '\u002f',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x0062:
|
||||
// b
|
||||
return {
|
||||
value: '\u0008',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x0066:
|
||||
// f
|
||||
return {
|
||||
value: '\u000c',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x006e:
|
||||
// n
|
||||
return {
|
||||
value: '\u000a',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x0072:
|
||||
// r
|
||||
return {
|
||||
value: '\u000d',
|
||||
size: 2,
|
||||
};
|
||||
|
||||
case 0x0074:
|
||||
// t
|
||||
return {
|
||||
value: '\u0009',
|
||||
size: 2,
|
||||
};
|
||||
}
|
||||
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid character escape sequence: "${body.slice(
|
||||
position,
|
||||
position + 2,
|
||||
)}".`,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Reads a block string token from the source file.
|
||||
*
|
||||
* ```
|
||||
* StringValue ::
|
||||
* - `"""` BlockStringCharacter* `"""`
|
||||
*
|
||||
* BlockStringCharacter ::
|
||||
* - SourceCharacter but not `"""` or `\"""`
|
||||
* - `\"""`
|
||||
* ```
|
||||
*/
|
||||
|
||||
function readBlockString(lexer, start) {
|
||||
const body = lexer.source.body;
|
||||
const bodyLength = body.length;
|
||||
let lineStart = lexer.lineStart;
|
||||
let position = start + 3;
|
||||
let chunkStart = position;
|
||||
let currentLine = '';
|
||||
const blockLines = [];
|
||||
|
||||
while (position < bodyLength) {
|
||||
const code = body.charCodeAt(position); // Closing Triple-Quote (""")
|
||||
|
||||
if (
|
||||
code === 0x0022 &&
|
||||
body.charCodeAt(position + 1) === 0x0022 &&
|
||||
body.charCodeAt(position + 2) === 0x0022
|
||||
) {
|
||||
currentLine += body.slice(chunkStart, position);
|
||||
blockLines.push(currentLine);
|
||||
const token = createToken(
|
||||
lexer,
|
||||
TokenKind.BLOCK_STRING,
|
||||
start,
|
||||
position + 3, // Return a string of the lines joined with U+000A.
|
||||
dedentBlockStringLines(blockLines).join('\n'),
|
||||
);
|
||||
lexer.line += blockLines.length - 1;
|
||||
lexer.lineStart = lineStart;
|
||||
return token;
|
||||
} // Escaped Triple-Quote (\""")
|
||||
|
||||
if (
|
||||
code === 0x005c &&
|
||||
body.charCodeAt(position + 1) === 0x0022 &&
|
||||
body.charCodeAt(position + 2) === 0x0022 &&
|
||||
body.charCodeAt(position + 3) === 0x0022
|
||||
) {
|
||||
currentLine += body.slice(chunkStart, position);
|
||||
chunkStart = position + 1; // skip only slash
|
||||
|
||||
position += 4;
|
||||
continue;
|
||||
} // LineTerminator
|
||||
|
||||
if (code === 0x000a || code === 0x000d) {
|
||||
currentLine += body.slice(chunkStart, position);
|
||||
blockLines.push(currentLine);
|
||||
|
||||
if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) {
|
||||
position += 2;
|
||||
} else {
|
||||
++position;
|
||||
}
|
||||
|
||||
currentLine = '';
|
||||
chunkStart = position;
|
||||
lineStart = position;
|
||||
continue;
|
||||
} // SourceCharacter
|
||||
|
||||
if (isUnicodeScalarValue(code)) {
|
||||
++position;
|
||||
} else if (isSupplementaryCodePoint(body, position)) {
|
||||
position += 2;
|
||||
} else {
|
||||
throw syntaxError(
|
||||
lexer.source,
|
||||
position,
|
||||
`Invalid character within String: ${printCodePointAt(
|
||||
lexer,
|
||||
position,
|
||||
)}.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
throw syntaxError(lexer.source, position, 'Unterminated string.');
|
||||
}
|
||||
/**
|
||||
* Reads an alphanumeric + underscore name from the source.
|
||||
*
|
||||
* ```
|
||||
* Name ::
|
||||
* - NameStart NameContinue* [lookahead != NameContinue]
|
||||
* ```
|
||||
*/
|
||||
|
||||
function readName(lexer, start) {
|
||||
const body = lexer.source.body;
|
||||
const bodyLength = body.length;
|
||||
let position = start + 1;
|
||||
|
||||
while (position < bodyLength) {
|
||||
const code = body.charCodeAt(position);
|
||||
|
||||
if (isNameContinue(code)) {
|
||||
++position;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return createToken(
|
||||
lexer,
|
||||
TokenKind.NAME,
|
||||
start,
|
||||
position,
|
||||
body.slice(start, position),
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user