All files / server/explain duckdb-parser.ts

93.22% Statements 55/59
93.87% Branches 46/49
100% Functions 3/3
92.85% Lines 52/56

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192                                                                      52x 52x 52x       52x   52x   108x 101x     98x 98x   95x 47x   95x 11x       95x 1x       95x     95x 19x     95x   52x     43x 43x 28x   43x     95x       52x                 52x                       95x 95x 351x 256x   95x     95x               98x         98x                     96x 96x   1x       95x       95x                       95x 95x 98x 98x 98x       98x     11x 11x 11x 11x     95x               95x    
/**
 * DuckDB EXPLAIN output parser
 * DuckDB uses a similar format to PostgreSQL but with some differences
 * Parses text format EXPLAIN output and normalizes to common structure
 */
 
import type { ExplainOperation, ExplainResult, ExplainSummary } from '../types/executor'
 
/**
 * Parse DuckDB EXPLAIN output
 *
 * Example DuckDB EXPLAIN output:
 * "┌───────────────────────────┐"
 * "│      EXPLANATION OF       │"
 * "│     QUERY PLAN            │"
 * "└───────────────────────────┘"
 * "┌─────────────────────────────────────────────────────────────────────┐"
 * "│                     QUERY PLAN                                      │"
 * "├─────────────────────────────────────────────────────────────────────┤"
 * "│  HASH_JOIN                                                          │"
 * "│  ├──SEQ_SCAN employees                                              │"
 * "│  │  (cost=100.0 rows=1000)                                          │"
 * "│  └──SEQ_SCAN departments                                            │"
 * "│     (cost=50.0 rows=500)                                            │"
 * "└─────────────────────────────────────────────────────────────────────┘"
 *
 * Or simpler format:
 * "HASH_JOIN"
 * "├──SEQ_SCAN employees"
 * "└──SEQ_SCAN departments"
 */
export function parseDuckDBExplain(
  rawOutput: string[],
  sqlQuery: { sql: string; params?: unknown[] }
): ExplainResult {
  const operations: ExplainOperation[] = []
  const usedIndexes: string[] = []
  let hasSequentialScans = false
  let totalCost: number | undefined
 
  // Stack for building hierarchical structure
  const stack: { indent: number; op: ExplainOperation }[] = []
 
  for (const line of rawOutput) {
    // Skip decorative lines (box drawing characters)
    if (/^[┌├└│─┐┤┘]+$/.test(line.trim())) continue
    if (/EXPLANATION|QUERY PLAN/i.test(line)) continue
 
    // Parse operation lines
    const operation = parseDuckDBOperationLine(line)
    if (operation) {
      // Track sequential scans and indexes
      if (operation.type.includes('SEQ_SCAN') || operation.type.includes('TABLE_SCAN')) {
        hasSequentialScans = true
      }
      if (operation.type.includes('INDEX_SCAN') && operation.index) {
        usedIndexes.push(operation.index)
      }
 
      // Track total cost (from root operation)
      if (operations.length === 0 && operation.estimatedCost !== undefined) {
        totalCost = operation.estimatedCost
      }
 
      // Calculate indentation level (count tree characters)
      const indent = countTreeIndent(line)
 
      // Pop stack until we find a parent with less indentation
      while (stack.length > 0 && stack[stack.length - 1].indent >= indent) {
        stack.pop()
      }
 
      if (stack.length === 0) {
        // Root level operation
        operations.push(operation)
      } else {
        // Child operation
        const parent = stack[stack.length - 1].op
        if (!parent.children) {
          parent.children = []
        }
        parent.children.push(operation)
      }
 
      stack.push({ indent, op: operation })
    }
  }
 
  const summary: ExplainSummary = {
    database: 'duckdb',
    planningTime: undefined,
    executionTime: undefined,
    totalCost,
    hasSequentialScans,
    usedIndexes: [...new Set(usedIndexes)],
  }
 
  return {
    operations,
    summary,
    raw: rawOutput.join('\n'),
    sql: sqlQuery,
  }
}
 
/**
 * Count indentation level based on tree drawing characters
 */
function countTreeIndent(line: string): number {
  let indent = 0
  for (const char of line) {
    if (char === ' ' || char === '│' || char === '├' || char === '└' || char === '─') {
      indent++
    } else {
      break
    }
  }
  return indent
}
 
/**
 * Parse a single DuckDB EXPLAIN line
 */
function parseDuckDBOperationLine(line: string): ExplainOperation | null {
  // Remove tree drawing characters and trim
  const trimmed = line
    .replace(/[┌├└│─┐┤┘]/g, '')
    .replace(/^\s*/, '')
    .trim()
 
  if (!trimmed) return null
 
  // Match DuckDB operation pattern
  // Examples:
  // "HASH_JOIN"
  // "SEQ_SCAN employees"
  // "INDEX_SCAN idx_employees_org on employees"
  // "FILTER (organisation_id = 'org-1')"
  // "(cost=100.0 rows=1000)"
 
  // Check for cost line
  const costMatch = trimmed.match(/^\(cost=([\d.]+)\s+rows=(\d+)\)$/i)
  if (costMatch) {
    // This is a cost annotation, not an operation
    return null
  }
 
  // Match operation with optional table/index
  const opMatch = trimmed.match(
    /^([A-Z_]+)(?:\s+(\S+))?(?:\s+on\s+(\S+))?(?:\s+\(cost=([\d.]+)\s+rows=(\d+)\))?/i
  )
 
  Iif (!opMatch) {
    // Check for filter pattern
    const filterMatch = trimmed.match(/^FILTER\s+(.+)$/i)
    if (filterMatch) {
      return {
        type: 'FILTER',
        filter: filterMatch[1],
      }
    }
    return null
  }
 
  const type = opMatch[1].toUpperCase()
  let table = opMatch[2] || undefined
  let index = opMatch[3] || undefined
  const cost = opMatch[4] ? parseFloat(opMatch[4]) : undefined
  const rows = opMatch[5] ? parseInt(opMatch[5], 10) : undefined
 
  // For INDEX_SCAN format: "INDEX_SCAN <index_name>" or "INDEX_SCAN <index_name> on <table_name>"
  // opMatch[2] is the index name, opMatch[3] (after "on") is the table name
  if (type === 'INDEX_SCAN') {
    // Swap: what was captured as 'table' is actually the index name
    // and what was captured as 'index' (after "on") is actually the table name
    const actualIndex = table
    const actualTable = index
    index = actualIndex
    table = actualTable
  }
 
  const operation: ExplainOperation = {
    type,
    table,
    index,
    estimatedRows: rows,
    estimatedCost: cost,
  }
 
  return operation
}