-
Notifications
You must be signed in to change notification settings - Fork 8
fix(wasm): resolve incremental edge loss, unnecessary reparses, and V8 crash #938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6ad8b74
9798c85
dc448dd
6cccdf5
36ea21a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -374,24 +374,73 @@ function purgeAndAddReverseDeps( | |
| // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670) | ||
| if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { | ||
| ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined); | ||
| // Native path still reparses reverse-deps (works correctly with native edge builder) | ||
| for (const relPath of reverseDeps) { | ||
| const absPath = path.join(rootDir, relPath); | ||
| ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); | ||
| } | ||
| } else { | ||
| // WASM/JS path: save edges from reverse-dep files → changed files BEFORE | ||
| // purge, then reconnect them to new node IDs after insertNodes (#932, #933). | ||
| // | ||
| // purgeFilesFromGraph deletes edges in BOTH directions for changed files, | ||
| // which already removes the reverse-dep → changed-file edges. The old | ||
| // approach then over-deleted ALL outgoing edges from reverse-dep files and | ||
| // reparsed them to rebuild everything — expensive (87 extra parses) and | ||
| // lossy (442 missing edges due to imperfect resolution on rebuild). | ||
| // | ||
| // New approach: save the edge topology, let purge handle deletion, then | ||
| // reconnect using new node IDs. No reparse needed. | ||
| if (hasReverseDeps && hasPurge) { | ||
| const changePathSet = new Set(changePaths); | ||
| const saveEdgesStmt = db.prepare(` | ||
| SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind, | ||
| n_tgt.file AS tgt_file, n_tgt.line AS tgt_line, | ||
| e.kind AS edge_kind, e.confidence, e.dynamic, | ||
| n_src.file AS src_file | ||
| FROM edges e | ||
| JOIN nodes n_src ON e.source_id = n_src.id | ||
| JOIN nodes n_tgt ON e.target_id = n_tgt.id | ||
| WHERE n_tgt.file = ? AND n_src.file != n_tgt.file | ||
| `); | ||
| for (const changedPath of changePaths) { | ||
| for (const row of saveEdgesStmt.all(changedPath) as Array<{ | ||
| source_id: number; | ||
| tgt_name: string; | ||
| tgt_kind: string; | ||
| tgt_file: string; | ||
| tgt_line: number; | ||
| edge_kind: string; | ||
| confidence: number; | ||
| dynamic: number; | ||
| src_file: string; | ||
| }>) { | ||
| // Skip edges whose source is also being purged — buildEdges will | ||
| // re-create them with correct new IDs. | ||
| if (changePathSet.has(row.src_file)) continue; | ||
| ctx.savedReverseDepEdges.push({ | ||
| sourceId: row.source_id, | ||
| tgtName: row.tgt_name, | ||
| tgtKind: row.tgt_kind, | ||
| tgtFile: row.tgt_file, | ||
| tgtLine: row.tgt_line, | ||
| edgeKind: row.edge_kind, | ||
| confidence: row.confidence, | ||
| dynamic: row.dynamic, | ||
| }); | ||
| } | ||
| } | ||
| debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`); | ||
| } | ||
|
Comment on lines
+394
to
+434
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The save query captures ALL edges whose target is in a changed file ( Add const changePathSet = new Set(changePaths);
const saveEdgesStmt = db.prepare(`
SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind,
n_tgt.file AS tgt_file, n_tgt.line AS tgt_line,
e.kind AS edge_kind, e.confidence, e.dynamic,
n_src.file AS src_file
FROM edges e
JOIN nodes n_src ON e.source_id = n_src.id
JOIN nodes n_tgt ON e.target_id = n_tgt.id
WHERE n_tgt.file = ? AND n_src.file != n_tgt.file
`);
for (const changedPath of changePaths) {
for (const row of saveEdgesStmt.all(changedPath) as Array<{
source_id: number; tgt_name: string; tgt_kind: string;
tgt_file: string; tgt_line: number; edge_kind: string;
confidence: number; dynamic: number; src_file: string;
}>) {
// Skip edges whose source is also being purged — buildEdges will
// re-create them with correct new IDs.
if (changePathSet.has(row.src_file)) continue;
ctx.savedReverseDepEdges.push({
sourceId: row.source_id,
tgtName: row.tgt_name,
tgtKind: row.tgt_kind,
tgtFile: row.tgt_file,
tgtLine: row.tgt_line,
edgeKind: row.edge_kind,
confidence: row.confidence,
dynamic: row.dynamic,
});
}
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed -- added
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed -- added changePathSet filter to skip edges whose source file is also in changePaths. The save query now selects n_src.file AS src_file and the loop does if (changePathSet.has(row.src_file)) continue before pushing to savedReverseDepEdges. This prevents stale source IDs from producing dangling edges after purge. |
||
|
|
||
| if (hasPurge) { | ||
| purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false }); | ||
| } | ||
| if (hasReverseDeps) { | ||
| const deleteOutgoingEdgesForFile = db.prepare( | ||
| 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', | ||
| ); | ||
| for (const relPath of reverseDepList) { | ||
| deleteOutgoingEdgesForFile.run(relPath); | ||
| } | ||
| } | ||
| // No outgoing-edge deletion for reverse-deps — purge already removed | ||
| // edges targeting the changed files, and other outgoing edges are valid. | ||
| // No reverse-deps added to parseChanges — no reparse needed. | ||
| } | ||
| } | ||
| for (const relPath of reverseDeps) { | ||
| const absPath = path.join(rootDir, relPath); | ||
| ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); | ||
| } | ||
| } | ||
|
|
||
| function detectHasEmbeddings(db: BetterSqlite3Database, nativeDb?: NativeDatabase): boolean { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error-path cleanup correctly calls
tree.delete()on WASM objects before GC can touch them. But on the success path the samectx.allSymbolstrees stay allocated until the GC decides to collectctx. For large builds with many parsed files this is a long-lived WASM memory hold. Clearing the trees right afterbuildEdges(or at the start ofrunAnalyses) would release WASM memory deterministically on both paths and remove any risk of the same crash pattern appearing on slow-GC runs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed -- added deterministic WASM tree cleanup on the success path, right after runAnalyses completes (trees are still needed during analysis). Same cleanup pattern as the error-path catch block: iterates ctx.allSymbols, calls tree.delete(), and sets _tree to undefined.