Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/ast-analysis/visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* so individual visitors don't need to track traversal state themselves.
*/

import { debug } from '../infrastructure/logger.js';
import type {
ScopeEntry,
TreeSitterNode,
Expand Down Expand Up @@ -132,6 +133,16 @@ function dispatchExitFunction(
}
}

/**
* Maximum AST depth before walk() bails out. WASM-backed tree-sitter parsers
* have a smaller stack than native V8 (~64-256 KB). Deep recursion in
* generated code, deeply nested object literals, or pathologically large
* switch statements can exhaust that stack, causing a V8 fatal error /
* segfault (#931). 500 levels is well beyond any realistic hand-written
* code while staying safely inside the WASM stack budget.
*/
const MAX_WALK_DEPTH = 500;

/** Collect finish() results from all visitors into a name-keyed map. */
function collectResults(visitors: Visitor[]): WalkResults {
const results: WalkResults = {};
Expand Down Expand Up @@ -181,6 +192,10 @@ export function walkWithVisitors(

function walk(node: TreeSitterNode | null, depth: number): void {
if (!node) return;
if (depth > MAX_WALK_DEPTH) {
debug(`walkWithVisitors: AST depth limit (${MAX_WALK_DEPTH}) hit — subtree truncated`);
return;
}

const type = node.type;
const isFuncBoundary = allFuncTypes.has(type);
Expand Down
17 changes: 17 additions & 0 deletions src/domain/graph/builder/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ export class PipelineContext {
nodesByName!: Map<string, NodeRow[]>;
nodesByNameAndFile!: Map<string, NodeRow[]>;

// ── Reverse-dep edge reconnection (set by detectChanges) ───────────
/**
* Edges from reverse-dep files to changed files, saved before purge so they
* can be reconnected to new node IDs after insertNodes (#932, #933).
* Eliminates the need to reparse reverse-dep files entirely.
*/
savedReverseDepEdges: Array<{
sourceId: number;
tgtName: string;
tgtKind: string;
tgtFile: string;
tgtLine: number;
edgeKind: string;
confidence: number;
dynamic: number;
}> = [];

// ── Misc state ─────────────────────────────────────────────────────
hasEmbeddings: boolean = false;
lineCountMap!: Map<string, number>;
Expand Down
38 changes: 36 additions & 2 deletions src/domain/graph/builder/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,23 @@ async function runPipelineStages(ctx: PipelineContext): Promise<void> {

await runAnalyses(ctx);

// Release WASM trees deterministically on the success path — same cleanup
// as the error-path catch block. Without this, trees stay allocated until
// GC collects ctx, holding WASM memory for the rest of the build (#931).
if (ctx.allSymbols?.size > 0) {
for (const [, symbols] of ctx.allSymbols) {
const tree = symbols._tree as { delete?: () => void } | undefined;
if (tree && typeof tree.delete === 'function') {
try {
tree.delete();
} catch {
/* ignore cleanup errors */
}
}
symbols._tree = undefined;
}
}

// Flush Rust WAL writes (AST, complexity, CFG, dataflow) so the JS
// connection and any post-build readers can see them. One TRUNCATE
// here replaces the N per-feature resumeJsDb checkpoints (#checkpoint-opt).
Expand Down Expand Up @@ -842,8 +859,25 @@ export async function buildGraph(

await runPipelineStages(ctx);
} catch (err) {
if (!ctx.earlyExit && ctx.db) {
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
if (!ctx.earlyExit) {
// Release WASM trees before closing DB to prevent V8 crash during
// GC cleanup of orphaned WASM objects (#931).
if (ctx.allSymbols?.size > 0) {
for (const [, symbols] of ctx.allSymbols) {
const tree = symbols._tree as { delete?: () => void } | undefined;
if (tree && typeof tree.delete === 'function') {
try {
tree.delete();
} catch {
/* ignore cleanup errors */
}
}
symbols._tree = undefined;
}
}
if (ctx.db) {
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
}
}
throw err;
}
Comment on lines +865 to 883
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 WASM trees not freed on the success path

The error-path cleanup correctly calls tree.delete() on WASM objects before GC can touch them. But on the success path the same ctx.allSymbols trees stay allocated until the GC decides to collect ctx. For large builds with many parsed files this is a long-lived WASM memory hold. Clearing the trees right after buildEdges (or at the start of runAnalyses) would release WASM memory deterministically on both paths and remove any risk of the same crash pattern appearing on slow-GC runs.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed -- added deterministic WASM tree cleanup on the success path, right after runAnalyses completes (trees are still needed during analysis). Same cleanup pattern as the error-path catch block: iterates ctx.allSymbols, calls tree.delete(), and sets _tree to undefined.

Expand Down
72 changes: 72 additions & 0 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,69 @@ function buildClassHierarchyEdges(
}
}

// ── Reverse-dep edge reconnection (#932, #933) ─────────────────────────

/**
* Reconnect edges that were saved before changed-file purge.
*
* Each saved edge records: sourceId (still valid — reverse-dep nodes were not
* purged) and target attributes (name, kind, file, line). The target node was
* deleted and re-inserted with a new ID by insertNodes. We look up the new ID
* by (name, kind, file) and re-create the edge.
*/
function reconnectReverseDepEdges(ctx: PipelineContext): void {
const { db } = ctx;
const findNodeStmt = db.prepare(
'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? ORDER BY ABS(line - ?) LIMIT 1',
);
const reconnectedRows: EdgeRowTuple[] = [];
let dropped = 0;

for (const saved of ctx.savedReverseDepEdges) {
const newTarget = findNodeStmt.get(
saved.tgtName,
saved.tgtKind,
saved.tgtFile,
saved.tgtLine,
) as { id: number } | undefined;
if (newTarget) {
reconnectedRows.push([
saved.sourceId,
newTarget.id,
saved.edgeKind,
saved.confidence,
saved.dynamic,
]);
} else {
// Target was removed or renamed in the changed file — edge is stale
dropped++;
}
}

if (reconnectedRows.length > 0) {
if (ctx.nativeDb?.bulkInsertEdges) {
const nativeEdges = reconnectedRows.map((r) => ({
sourceId: r[0],
targetId: r[1],
kind: r[2],
confidence: r[3],
dynamic: r[4],
}));
const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
if (!ok) {
batchInsertEdges(db, reconnectedRows);
}
} else {
batchInsertEdges(db, reconnectedRows);
}
}

debug(
`Reconnected ${reconnectedRows.length} reverse-dep edges` +
(dropped > 0 ? ` (${dropped} dropped — targets removed/renamed)` : ''),
);
}

// ── Main entry point ────────────────────────────────────────────────────

/**
Expand Down Expand Up @@ -860,5 +923,14 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
}
}

// Phase 3: Reconnect saved reverse-dep edges (#932, #933).
// When the WASM/JS path purged changed files, edges FROM reverse-dep files TO
// those files were deleted (target-side). The reverse-dep files were NOT
// reparsed — instead we saved the edge topology before purge and now reconnect
// each edge to the new node IDs created by insertNodes.
if (ctx.savedReverseDepEdges.length > 0) {
reconnectReverseDepEdges(ctx);
}

ctx.timing.edgesMs = performance.now() - t0;
}
73 changes: 61 additions & 12 deletions src/domain/graph/builder/stages/detect-changes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -374,24 +374,73 @@ function purgeAndAddReverseDeps(
// Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670)
if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) {
ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined);
// Native path still reparses reverse-deps (works correctly with native edge builder)
for (const relPath of reverseDeps) {
const absPath = path.join(rootDir, relPath);
ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
}
} else {
// WASM/JS path: save edges from reverse-dep files → changed files BEFORE
// purge, then reconnect them to new node IDs after insertNodes (#932, #933).
//
// purgeFilesFromGraph deletes edges in BOTH directions for changed files,
// which already removes the reverse-dep → changed-file edges. The old
// approach then over-deleted ALL outgoing edges from reverse-dep files and
// reparsed them to rebuild everything — expensive (87 extra parses) and
// lossy (442 missing edges due to imperfect resolution on rebuild).
//
// New approach: save the edge topology, let purge handle deletion, then
// reconnect using new node IDs. No reparse needed.
if (hasReverseDeps && hasPurge) {
const changePathSet = new Set(changePaths);
const saveEdgesStmt = db.prepare(`
SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind,
n_tgt.file AS tgt_file, n_tgt.line AS tgt_line,
e.kind AS edge_kind, e.confidence, e.dynamic,
n_src.file AS src_file
FROM edges e
JOIN nodes n_src ON e.source_id = n_src.id
JOIN nodes n_tgt ON e.target_id = n_tgt.id
WHERE n_tgt.file = ? AND n_src.file != n_tgt.file
`);
for (const changedPath of changePaths) {
for (const row of saveEdgesStmt.all(changedPath) as Array<{
source_id: number;
tgt_name: string;
tgt_kind: string;
tgt_file: string;
tgt_line: number;
edge_kind: string;
confidence: number;
dynamic: number;
src_file: string;
}>) {
// Skip edges whose source is also being purged — buildEdges will
// re-create them with correct new IDs.
if (changePathSet.has(row.src_file)) continue;
ctx.savedReverseDepEdges.push({
sourceId: row.source_id,
tgtName: row.tgt_name,
tgtKind: row.tgt_kind,
tgtFile: row.tgt_file,
tgtLine: row.tgt_line,
edgeKind: row.edge_kind,
confidence: row.confidence,
dynamic: row.dynamic,
});
}
}
debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`);
}
Comment on lines +394 to +434
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Stale source IDs saved for edges between two co-changed files

The save query captures ALL edges whose target is in a changed file (n_tgt.file = ?), including edges whose source is also a file in changePaths. After purgeFilesFromGraph runs, those source nodes are deleted and their IDs are gone. reconnectReverseDepEdges in build-edges.ts then calls batchInsertEdges with the stale sourceId, creating a dangling edge. Since FK constraints are off by default in SQLite, the insert silently succeeds. buildEdges then also creates the correct A_new → B_new edge — leaving a duplicate dangling edge in the graph for every pair of co-changed interdependent files that also have a reverse-dep.

Add n_src.file AS src_file to the SELECT and filter it out in the push loop:

const changePathSet = new Set(changePaths);
const saveEdgesStmt = db.prepare(`
  SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind,
         n_tgt.file AS tgt_file, n_tgt.line AS tgt_line,
         e.kind AS edge_kind, e.confidence, e.dynamic,
         n_src.file AS src_file
  FROM edges e
  JOIN nodes n_src ON e.source_id = n_src.id
  JOIN nodes n_tgt ON e.target_id = n_tgt.id
  WHERE n_tgt.file = ? AND n_src.file != n_tgt.file
`);
for (const changedPath of changePaths) {
  for (const row of saveEdgesStmt.all(changedPath) as Array<{
    source_id: number; tgt_name: string; tgt_kind: string;
    tgt_file: string; tgt_line: number; edge_kind: string;
    confidence: number; dynamic: number; src_file: string;
  }>) {
    // Skip edges whose source is also being purged — buildEdges will
    // re-create them with correct new IDs.
    if (changePathSet.has(row.src_file)) continue;
    ctx.savedReverseDepEdges.push({
      sourceId: row.source_id,
      tgtName: row.tgt_name,
      tgtKind: row.tgt_kind,
      tgtFile: row.tgt_file,
      tgtLine: row.tgt_line,
      edgeKind: row.edge_kind,
      confidence: row.confidence,
      dynamic: row.dynamic,
    });
  }
}

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed -- added changePathSet filter to skip edges whose source file is also in changePaths. The save query now selects n_src.file AS src_file and the loop does if (changePathSet.has(row.src_file)) continue before pushing to savedReverseDepEdges. This prevents stale source IDs from producing dangling edges after purge.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed -- added changePathSet filter to skip edges whose source file is also in changePaths. The save query now selects n_src.file AS src_file and the loop does if (changePathSet.has(row.src_file)) continue before pushing to savedReverseDepEdges. This prevents stale source IDs from producing dangling edges after purge.


if (hasPurge) {
purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false });
}
if (hasReverseDeps) {
const deleteOutgoingEdgesForFile = db.prepare(
'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
);
for (const relPath of reverseDepList) {
deleteOutgoingEdgesForFile.run(relPath);
}
}
// No outgoing-edge deletion for reverse-deps — purge already removed
// edges targeting the changed files, and other outgoing edges are valid.
// No reverse-deps added to parseChanges — no reparse needed.
}
}
for (const relPath of reverseDeps) {
const absPath = path.join(rootDir, relPath);
ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
}
}

function detectHasEmbeddings(db: BetterSqlite3Database, nativeDb?: NativeDatabase): boolean {
Expand Down
31 changes: 5 additions & 26 deletions src/domain/graph/builder/stages/run-analyses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,18 @@
* Stage: runAnalyses
*
* Dispatches to the unified AST analysis engine (AST nodes, complexity, CFG, dataflow).
* Filters out reverse-dep files for incremental builds.
* Reverse-dep files are no longer in allSymbols (they are not reparsed since #932/#933),
* so no filtering is needed here.
*/
import { debug, warn } from '../../../../infrastructure/logger.js';
import type { ExtractorOutput } from '../../../../types.js';
import { warn } from '../../../../infrastructure/logger.js';
import type { PipelineContext } from '../context.js';

export async function runAnalyses(ctx: PipelineContext): Promise<void> {
const { db, allSymbols, rootDir, opts, engineOpts, isFullBuild, filesToParse } = ctx;

// For incremental builds, exclude reverse-dep-only files
let astComplexitySymbols: Map<string, ExtractorOutput> = allSymbols;
if (!isFullBuild) {
const reverseDepFiles = new Set(
filesToParse
.filter((item) => (item as { _reverseDepOnly?: boolean })._reverseDepOnly)
.map((item) => item.relPath),
);
if (reverseDepFiles.size > 0) {
astComplexitySymbols = new Map();
for (const [relPath, symbols] of allSymbols) {
if (!reverseDepFiles.has(relPath)) {
astComplexitySymbols.set(relPath, symbols);
}
}
debug(
`AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`,
);
}
}
const { db, allSymbols, rootDir, opts, engineOpts } = ctx;

const { runAnalyses: runAnalysesFn } = await import('../../../../ast-analysis/engine.js');
try {
const analysisTiming = await runAnalysesFn(db, astComplexitySymbols, rootDir, opts, engineOpts);
const analysisTiming = await runAnalysesFn(db, allSymbols, rootDir, opts, engineOpts);
ctx.timing.astMs = analysisTiming.astMs;
ctx.timing.complexityMs = analysisTiming.complexityMs;
ctx.timing.cfgMs = analysisTiming.cfgMs;
Expand Down
Loading