Skip to content

Commit c60ea56

Browse files
committed
module: detect ESM syntax by trying to recompile as SourceTextModule
Instead of using an async function wrapper, just try compiling code with unknown module format as SourceTextModule when it cannot be compiled as CJS and the error message indicates that it's worth a retry. If it can be parsed as SourceTextModule then it's considered ESM. Also, move shouldRetryAsESM() to C++ completely so that we can reuse it in the CJS module loader for require(esm). Drive-by: move methods that don't belong to ContextifyContext out as static methods and move GetHostDefinedOptions to ModuleWrap. PR-URL: nodejs#52413 Reviewed-By: Geoffrey Booth <[email protected]> Reviewed-By: Jacob Smith <[email protected]>
1 parent 43101ad commit c60ea56

File tree

7 files changed

+306
-358
lines changed

7 files changed

+306
-358
lines changed

lib/internal/modules/esm/get_format.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
114114
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
115115
if (getOptionValue('--experimental-detect-module')) {
116116
const format = source ?
117-
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
117+
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
118118
null;
119119
if (format === 'module') {
120120
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
@@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
158158
if (!source) { return null; }
159159
const format = getFormatOfExtensionlessFile(url);
160160
if (format === 'module') {
161-
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
161+
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
162162
}
163163
return format;
164164
}

lib/internal/modules/helpers.js

-35
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22-
const {
23-
shouldRetryAsESM: contextifyShouldRetryAsESM,
24-
constants: {
25-
syntaxDetectionErrors: {
26-
esmSyntaxErrorMessages,
27-
throwsOnlyInCommonJSErrorMessages,
28-
},
29-
},
30-
} = internalBinding('contextify');
3122
const { validateString } = require('internal/validators');
3223
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
3324
const internalFS = require('internal/fs/utils');
@@ -338,31 +329,6 @@ function urlToFilename(url) {
338329
return url;
339330
}
340331

341-
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
342-
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
343-
/**
344-
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
345-
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
346-
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
347-
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
348-
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
349-
* @param {string} source Module contents
350-
*/
351-
function shouldRetryAsESM(errorMessage, source) {
352-
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
353-
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
354-
return true;
355-
}
356-
357-
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
358-
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
359-
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
360-
}
361-
362-
return false;
363-
}
364-
365-
366332
// Whether we have started executing any user-provided CJS code.
367333
// This is set right before we call the wrapped CJS code (not after,
368334
// in case we are half-way in the execution when internals check this).
@@ -396,7 +362,6 @@ module.exports = {
396362
loadBuiltinModule,
397363
makeRequireFunction,
398364
normalizeReferrerURL,
399-
shouldRetryAsESM,
400365
stripBOM,
401366
toRealPath,
402367
hasStartedUserCJSExecution() {

lib/internal/modules/run_main.js

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
'use strict';
22

33
const {
4+
ObjectGetPrototypeOf,
45
StringPrototypeEndsWith,
6+
SyntaxErrorPrototype,
57
} = primordials;
68

79
const { getNearestParentPackageJSONType } = internalBinding('modules');
@@ -162,27 +164,29 @@ function runEntryPointWithESMLoader(callback) {
162164
function executeUserEntryPoint(main = process.argv[1]) {
163165
const resolvedMain = resolveMainPath(main);
164166
const useESMLoader = shouldUseESMLoader(resolvedMain);
165-
167+
let mainURL;
166168
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
167169
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
168170
let retryAsESM = false;
169171
if (!useESMLoader) {
170172
const cjsLoader = require('internal/modules/cjs/loader');
171173
const { Module } = cjsLoader;
172174
if (getOptionValue('--experimental-detect-module')) {
175+
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
173176
try {
174177
// Module._load is the monkey-patchable CJS module loader.
175178
Module._load(main, null, true);
176179
} catch (error) {
177-
const source = cjsLoader.entryPointSource;
178-
const { shouldRetryAsESM } = require('internal/modules/helpers');
179-
retryAsESM = shouldRetryAsESM(error.message, source);
180-
// In case the entry point is a large file, such as a bundle,
181-
// ensure no further references can prevent it being garbage-collected.
182-
cjsLoader.entryPointSource = undefined;
180+
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
181+
const { shouldRetryAsESM } = internalBinding('contextify');
182+
const mainPath = resolvedMain || main;
183+
mainURL = pathToFileURL(mainPath).href;
184+
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
185+
// In case the entry point is a large file, such as a bundle,
186+
// ensure no further references can prevent it being garbage-collected.
187+
cjsLoader.entryPointSource = undefined;
188+
}
183189
if (!retryAsESM) {
184-
const { enrichCJSError } = require('internal/modules/esm/translators');
185-
enrichCJSError(error, source, resolvedMain);
186190
throw error;
187191
}
188192
}
@@ -193,7 +197,9 @@ function executeUserEntryPoint(main = process.argv[1]) {
193197

194198
if (useESMLoader || retryAsESM) {
195199
const mainPath = resolvedMain || main;
196-
const mainURL = pathToFileURL(mainPath).href;
200+
if (mainURL === undefined) {
201+
mainURL = pathToFileURL(mainPath).href;
202+
}
197203

198204
runEntryPointWithESMLoader((cascadedLoader) => {
199205
// Note that if the graph contains unfinished TLA, this may never resolve

src/module_wrap.cc

+100-52
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,17 @@ ModuleWrap* ModuleWrap::GetFromModule(Environment* env,
102102
return nullptr;
103103
}
104104

105-
// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
105+
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
106+
Isolate* isolate, Local<Symbol> id_symbol) {
107+
Local<PrimitiveArray> host_defined_options =
108+
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
109+
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
110+
return host_defined_options;
111+
}
112+
113+
// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
106114
// new ModuleWrap(url, context, source, lineOffset, columOffset,
107-
// hostDefinedOption)
115+
// idSymbol);
108116
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
109117
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
110118
CHECK(args.IsConstructCall());
@@ -143,9 +151,10 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
143151
// cjsModule])
144152
CHECK(args[3]->IsFunction());
145153
} else {
146-
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
154+
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
155+
// cachedData]);
147156
// new ModuleWrap(url, context, source, lineOffset, columOffset,
148-
// hostDefinedOption)
157+
// idSymbol);
149158
CHECK(args[2]->IsString());
150159
CHECK(args[3]->IsNumber());
151160
line_offset = args[3].As<Int32>()->Value();
@@ -159,7 +168,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
159168
} else {
160169
id_symbol = Symbol::New(isolate, url);
161170
}
162-
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
171+
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);
163172

164173
if (that->SetPrivate(context,
165174
realm->isolate_data()->host_defined_option_symbol(),
@@ -192,50 +201,34 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
192201
module = Module::CreateSyntheticModule(isolate, url, export_names,
193202
SyntheticModuleEvaluationStepsCallback);
194203
} else {
195-
ScriptCompiler::CachedData* cached_data = nullptr;
196-
bool used_cache_from_user = false;
204+
// When we are compiling for the default loader, this will be
205+
// std::nullopt, and CompileSourceTextModule() should use
206+
// on-disk cache.
207+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
208+
if (id_symbol !=
209+
realm->isolate_data()->source_text_module_default_hdo()) {
210+
user_cached_data = nullptr;
211+
}
197212
if (args[5]->IsArrayBufferView()) {
198-
DCHECK(!can_use_builtin_cache); // We don't use this option internally.
199-
used_cache_from_user = true;
213+
CHECK(!can_use_builtin_cache); // We don't use this option internally.
200214
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
201215
uint8_t* data =
202216
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
203-
cached_data =
217+
user_cached_data =
204218
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
205219
cached_data_buf->ByteLength());
206220
}
207-
208221
Local<String> source_text = args[2].As<String>();
209-
ScriptOrigin origin(isolate,
210-
url,
211-
line_offset,
212-
column_offset,
213-
true, // is cross origin
214-
-1, // script id
215-
Local<Value>(), // source map URL
216-
false, // is opaque (?)
217-
false, // is WASM
218-
true, // is ES Module
219-
host_defined_options);
220-
221-
CompileCacheEntry* cache_entry = nullptr;
222-
if (can_use_builtin_cache && realm->env()->use_compile_cache()) {
223-
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
224-
source_text, url, CachedCodeType::kESM);
225-
}
226-
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
227-
// source will take ownership of cached_data.
228-
cached_data = cache_entry->CopyCache();
229-
}
230222

231-
ScriptCompiler::Source source(source_text, origin, cached_data);
232-
ScriptCompiler::CompileOptions options;
233-
if (source.GetCachedData() == nullptr) {
234-
options = ScriptCompiler::kNoCompileOptions;
235-
} else {
236-
options = ScriptCompiler::kConsumeCodeCache;
237-
}
238-
if (!ScriptCompiler::CompileModule(isolate, &source, options)
223+
bool cache_rejected = false;
224+
if (!CompileSourceTextModule(realm,
225+
source_text,
226+
url,
227+
line_offset,
228+
column_offset,
229+
host_defined_options,
230+
user_cached_data,
231+
&cache_rejected)
239232
.ToLocal(&module)) {
240233
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
241234
CHECK(!try_catch.Message().IsEmpty());
@@ -249,18 +242,8 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
249242
return;
250243
}
251244

252-
bool cache_rejected = false;
253-
if (options == ScriptCompiler::kConsumeCodeCache) {
254-
cache_rejected = source.GetCachedData()->rejected;
255-
}
256-
257-
if (cache_entry != nullptr) {
258-
realm->env()->compile_cache_handler()->MaybeSave(
259-
cache_entry, module, cache_rejected);
260-
}
261-
262-
// If the cache comes from builtin compile cache, fail silently.
263-
if (cache_rejected && used_cache_from_user) {
245+
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
246+
cache_rejected) {
264247
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
265248
realm, "cachedData buffer was rejected");
266249
try_catch.ReThrow();
@@ -303,6 +286,71 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
303286
args.GetReturnValue().Set(that);
304287
}
305288

289+
MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
290+
Realm* realm,
291+
Local<String> source_text,
292+
Local<String> url,
293+
int line_offset,
294+
int column_offset,
295+
Local<PrimitiveArray> host_defined_options,
296+
std::optional<ScriptCompiler::CachedData*> user_cached_data,
297+
bool* cache_rejected) {
298+
Isolate* isolate = realm->isolate();
299+
EscapableHandleScope scope(isolate);
300+
ScriptOrigin origin(isolate,
301+
url,
302+
line_offset,
303+
column_offset,
304+
true, // is cross origin
305+
-1, // script id
306+
Local<Value>(), // source map URL
307+
false, // is opaque (?)
308+
false, // is WASM
309+
true, // is ES Module
310+
host_defined_options);
311+
ScriptCompiler::CachedData* cached_data = nullptr;
312+
CompileCacheEntry* cache_entry = nullptr;
313+
// When compiling for the default loader, user_cached_data is std::nullptr.
314+
// When compiling for vm.Module, it's either nullptr or a pointer to the
315+
// cached data.
316+
if (user_cached_data.has_value()) {
317+
cached_data = user_cached_data.value();
318+
} else if (realm->env()->use_compile_cache()) {
319+
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
320+
source_text, url, CachedCodeType::kESM);
321+
}
322+
323+
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
324+
// source will take ownership of cached_data.
325+
cached_data = cache_entry->CopyCache();
326+
}
327+
328+
ScriptCompiler::Source source(source_text, origin, cached_data);
329+
ScriptCompiler::CompileOptions options;
330+
if (cached_data == nullptr) {
331+
options = ScriptCompiler::kNoCompileOptions;
332+
} else {
333+
options = ScriptCompiler::kConsumeCodeCache;
334+
}
335+
336+
Local<Module> module;
337+
if (!ScriptCompiler::CompileModule(isolate, &source, options)
338+
.ToLocal(&module)) {
339+
return scope.EscapeMaybe(MaybeLocal<Module>());
340+
}
341+
342+
if (options == ScriptCompiler::kConsumeCodeCache) {
343+
*cache_rejected = source.GetCachedData()->rejected;
344+
}
345+
346+
if (cache_entry != nullptr) {
347+
realm->env()->compile_cache_handler()->MaybeSave(
348+
cache_entry, module, *cache_rejected);
349+
}
350+
351+
return scope.Escape(module);
352+
}
353+
306354
static Local<Object> createImportAttributesContainer(
307355
Realm* realm,
308356
Isolate* isolate,

src/module_wrap.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
55

6-
#include <unordered_map>
6+
#include <optional>
77
#include <string>
8+
#include <unordered_map>
89
#include <vector>
910
#include "base_object.h"
11+
#include "v8-script.h"
1012

1113
namespace node {
1214

@@ -68,6 +70,23 @@ class ModuleWrap : public BaseObject {
6870
return true;
6971
}
7072

73+
static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
74+
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);
75+
76+
// When user_cached_data is not std::nullopt, use the code cache if it's not
77+
// nullptr, otherwise don't use code cache.
78+
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
79+
// See: https://github.com/nodejs/node/issues/47472
80+
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
81+
Realm* realm,
82+
v8::Local<v8::String> source_text,
83+
v8::Local<v8::String> url,
84+
int line_offset,
85+
int column_offset,
86+
v8::Local<v8::PrimitiveArray> host_defined_options,
87+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
88+
bool* cache_rejected);
89+
7190
private:
7291
ModuleWrap(Realm* realm,
7392
v8::Local<v8::Object> object,

0 commit comments

Comments
 (0)