All files / server join-path-resolver.ts

84.25% Statements 91/108
84.31% Branches 43/51
93.75% Functions 15/16
83.65% Lines 87/104

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385                                                                            83x           83x                                 541x         541x 541x 541x 403x       138x     138x   138x 163x 163x   163x 36x       127x   174x 174x   174x 18x     156x                 156x   107x 107x     49x 49x         31x 31x                                                                 235x 235x   1x       234x 298x                   298x 313x     298x 1x           314x 298x         298x   298x     310x         234x 68x 64x 60x     234x                                     235x       235x 235x       235x 781x     781x       781x 781x 447x       334x 936x 936x   936x 92x     844x                 844x   298x     546x 546x 546x         235x                     989x   433x 524x 524x 78x       355x                               236x     236x       237x       237x         237x 237x     236x                                                                             541x 541x       541x 541x 138x   403x       138x      
/**
 * JoinPathResolver - Handles join path finding and connectivity analysis
 *
 * Extracted from QueryPlanner for single responsibility.
 * Uses BFS algorithm to find shortest paths between cubes.
 * Includes connectivity caching for performance optimization.
 */
 
import type { Cube, CubeJoin } from './types'
import { resolveCubeReference, isolateSqlExpression } from './cube-utils'
import { eq, and, sql, type SQL } from 'drizzle-orm'
 
/**
 * Internal representation of a join path step
 * Used during path finding - simpler than the public JoinPathStep analysis type
 */
export interface InternalJoinPathStep {
  /** Source cube name */
  fromCube: string
  /** Target cube name */
  toCube: string
  /** The join definition from the source cube */
  joinDef: CubeJoin
}
 
/**
 * Cache entry for connectivity between cubes
 * Simple cache without TTL since cube config is set once at startup
 */
interface ConnectivityCacheEntry {
  path: InternalJoinPathStep[] | null
}
 
/**
 * Resolves join paths between cubes and manages connectivity caching
 */
export class JoinPathResolver {
  private cubes: Map<string, Cube>
  private connectivityCache: Map<string, ConnectivityCacheEntry> = new Map()
 
  /**
   * @param cubes Map of cube name to cube definition
   */
  constructor(cubes: Map<string, Cube>) {
    this.cubes = cubes
  }
 
  /**
   * Find the shortest join path from source cube to target cube
   * Uses BFS algorithm for optimal path discovery
   *
   * @param fromCube Source cube name
   * @param toCube Target cube name
   * @param alreadyProcessed Set of cubes to exclude from path finding
   * @returns Array of join steps or null if no path exists
   */
  findPath(
    fromCube: string,
    toCube: string,
    alreadyProcessed: Set<string> = new Set()
  ): InternalJoinPathStep[] | null {
    Iif (fromCube === toCube) {
      return []
    }
 
    // Check cache first
    const cacheKey = this.getCacheKey(fromCube, toCube, alreadyProcessed)
    const cached = this.getFromCache(cacheKey)
    if (cached !== undefined) {
      return cached
    }
 
    // BFS to find shortest path
    const queue: Array<{ cube: string; path: InternalJoinPathStep[] }> = [
      { cube: fromCube, path: [] }
    ]
    const visited = new Set([fromCube, ...alreadyProcessed])
 
    while (queue.length > 0) {
      const { cube: currentCube, path } = queue.shift()!
      const cubeDefinition = this.cubes.get(currentCube)
 
      if (!cubeDefinition?.joins) {
        continue
      }
 
      // Check all joins from current cube - resolve lazy references
      for (const [, joinDef] of Object.entries(cubeDefinition.joins)) {
        // Resolve cube reference to get actual cube name
        const resolvedTargetCube = resolveCubeReference(joinDef.targetCube)
        const actualTargetName = resolvedTargetCube.name
 
        if (visited.has(actualTargetName)) {
          continue
        }
 
        const newPath: InternalJoinPathStep[] = [
          ...path,
          {
            fromCube: currentCube,
            toCube: actualTargetName,
            joinDef
          }
        ]
 
        if (actualTargetName === toCube) {
          // Cache successful path
          this.setInCache(cacheKey, newPath)
          return newPath
        }
 
        visited.add(actualTargetName)
        queue.push({ cube: actualTargetName, path: newPath })
      }
    }
 
    // No path found - cache the negative result
    this.setInCache(cacheKey, null)
    return null
  }
 
  /**
   * Find path that prefers going through specified cubes when possible
   * Used when certain cubes have measures in the query - ensures joins go through
   * the semantically correct path (e.g., through junction tables when their measures are used)
   *
   * IMPORTANT: This method allows paths to go THROUGH already-processed cubes (as intermediate
   * steps) but won't return them as new cubes to add. This is crucial for preferring paths
   * through cubes that have measures (like junction tables).
   *
   * Path scoring priority (highest to lowest):
   * 1. Paths using joins with `preferredFor` that includes the target cube (score +10)
   * 2. Paths going through cubes with measures in the query (score +1 per cube)
   * 3. Paths reusing already-processed cubes
   * 4. Shorter paths
   *
   * @param fromCube Source cube name
   * @param toCube Target cube name
   * @param preferredCubes Set of cube names to prefer in the path (usually cubes with measures)
   * @param alreadyProcessed Set of cubes already in the join plan (can be used as intermediates)
   * @returns Array of join steps or null if no path exists
   */
  findPathPreferring(
    fromCube: string,
    toCube: string,
    preferredCubes: Set<string>,
    alreadyProcessed: Set<string> = new Set()
  ): InternalJoinPathStep[] | null {
    // Find ALL paths WITHOUT excluding already-processed cubes from intermediate steps
    // This allows us to find paths through cubes that are already in the join plan
    // (e.g., going through EmployeeTeams to reach Teams)
    const allPaths = this.findAllPaths(fromCube, toCube, new Set()) // Note: empty exclusion set
    if (allPaths.length === 0) {
      // Fall back to standard path finding with exclusions
      return this.findPath(fromCube, toCube, alreadyProcessed)
    }
 
    // Score paths with multiple criteria
    const scored = allPaths.map(path => {
      let score = 0
 
      // +10 for paths using joins with preferredFor that includes the target cube
      // ONLY apply this bonus when the preferredFor is on the FIRST hop from the starting cube.
      // This ensures preferredFor only matters when we're actually routing FROM the cube
      // that defines the preference, not when we're just passing through it.
      // Example: Employees.joins.EmployeeTeams has preferredFor: ['Teams']
      // - When routing FROM Employees TO Teams: preferredFor applies (first hop)
      // - When routing FROM Departments TO Teams: preferredFor should NOT apply
      //   even if the path goes through Employees
      const usesPreferredJoin = path.some((step, index) =>
        step.joinDef.preferredFor?.includes(toCube) && index === 0
      )
 
      if (usesPreferredJoin) {
        score += 10
      }
 
      // +1 for each preferred cube (cubes with measures) in the path
      // BUT penalize longer paths to prevent unnecessarily routing through preferred cubes
      // when a shorter direct path exists
      const preferredCubesInPath = path.filter(step => preferredCubes.has(step.toCube)).length
      score += preferredCubesInPath
 
      // Penalize path length: subtract (length - 1) to favor shorter paths
      // This ensures a direct path (length 1, penalty 0) beats a path through
      // preferred cubes (e.g., length 4, +1 for preferred, -3 for length = -2)
      score -= (path.length - 1)
 
      return {
        path,
        score,
        usesProcessed: path.some(step => alreadyProcessed.has(step.toCube))
      }
    })
 
    // Sort: prefer higher score, then prefer paths using already-processed cubes, then shorter
    scored.sort((a, b) => {
      if (b.score !== a.score) return b.score - a.score
      if (a.usesProcessed !== b.usesProcessed) return a.usesProcessed ? -1 : 1
      return a.path.length - b.path.length
    })
 
    return scored[0].path
  }
 
  /**
   * Find all possible paths between two cubes (up to maxDepth)
   * Used by findPathPreferring to evaluate multiple paths
   *
   * @param fromCube Source cube name
   * @param toCube Target cube name
   * @param alreadyProcessed Set of cubes to exclude from path finding
   * @param maxDepth Maximum path length to search (default 4 to avoid explosion)
   * @returns Array of all valid paths
   */
  private findAllPaths(
    fromCube: string,
    toCube: string,
    alreadyProcessed: Set<string>,
    maxDepth: number = 4
  ): InternalJoinPathStep[][] {
    Iif (fromCube === toCube) {
      return [[]]
    }
 
    const allPaths: InternalJoinPathStep[][] = []
    const queue: Array<{ cube: string; path: InternalJoinPathStep[]; visited: Set<string> }> = [
      { cube: fromCube, path: [], visited: new Set([fromCube, ...alreadyProcessed]) }
    ]
 
    while (queue.length > 0) {
      const { cube: currentCube, path, visited } = queue.shift()!
 
      // Stop if path is too long
      Iif (path.length >= maxDepth) {
        continue
      }
 
      const cubeDefinition = this.cubes.get(currentCube)
      if (!cubeDefinition?.joins) {
        continue
      }
 
      // Check all joins from current cube
      for (const [, joinDef] of Object.entries(cubeDefinition.joins)) {
        const resolvedTargetCube = resolveCubeReference(joinDef.targetCube)
        const actualTargetName = resolvedTargetCube.name
 
        if (visited.has(actualTargetName)) {
          continue
        }
 
        const newPath: InternalJoinPathStep[] = [
          ...path,
          {
            fromCube: currentCube,
            toCube: actualTargetName,
            joinDef
          }
        ]
 
        if (actualTargetName === toCube) {
          // Found a path - add to results but don't stop (collect all paths)
          allPaths.push(newPath)
        } else {
          // Continue searching - create new visited set for this branch
          const newVisited = new Set(visited)
          newVisited.add(actualTargetName)
          queue.push({ cube: actualTargetName, path: newPath, visited: newVisited })
        }
      }
    }
 
    return allPaths
  }
 
  /**
   * Check if a cube can reach all other cubes in the list via joins
   *
   * @param fromCube Starting cube name
   * @param allCubes List of all cubes that must be reachable
   * @returns true if all cubes are reachable
   */
  canReachAll(fromCube: string, allCubes: string[]): boolean {
    const otherCubes = allCubes.filter(name => name !== fromCube)
 
    for (const targetCube of otherCubes) {
      const path = this.findPath(fromCube, targetCube, new Set())
      if (!path || path.length === 0) {
        return false
      }
    }
 
    return true
  }
 
  /**
   * Build SQL join condition from join definition
   *
   * @param joinDef The cube join definition
   * @param sourceAlias Optional alias for source table (null uses actual column)
   * @param targetAlias Optional alias for target table (null uses actual column)
   * @returns SQL condition for the join
   */
  buildJoinCondition(
    joinDef: CubeJoin,
    sourceAlias: string | null,
    targetAlias: string | null
  ): SQL {
    const conditions: SQL[] = []
 
    // Process array of join conditions
    for (const joinOn of joinDef.on) {
      // Use actual column objects instead of aliases for regular table joins
      // Apply SQL isolation when using raw column objects to prevent mutation issues
      // (See CLAUDE.md SQL Object Isolation Pattern)
      const sourceCol = sourceAlias
        ? sql`${sql.identifier(sourceAlias)}.${sql.identifier(joinOn.source.name)}`
        : isolateSqlExpression(joinOn.source)
 
      const targetCol = targetAlias
        ? sql`${sql.identifier(targetAlias)}.${sql.identifier(joinOn.target.name)}`
        : isolateSqlExpression(joinOn.target)
 
      // Use custom comparator or default to eq
      const comparator = joinOn.as || eq
      conditions.push(comparator(sourceCol as any, targetCol as any))
    }
 
    return and(...conditions)!
  }
 
  /**
   * Get all reachable cubes from a starting cube
   * Useful for analyzing cube connectivity
   *
   * @param fromCube Starting cube name
   * @returns Set of all reachable cube names
   */
  getReachableCubes(fromCube: string): Set<string> {
    const reachable = new Set<string>([fromCube])
    const queue = [fromCube]
 
    while (queue.length > 0) {
      const currentCube = queue.shift()!
      const cubeDefinition = this.cubes.get(currentCube)
 
      if (!cubeDefinition?.joins) {
        continue
      }
 
      for (const [, joinDef] of Object.entries(cubeDefinition.joins)) {
        const resolvedTargetCube = resolveCubeReference(joinDef.targetCube)
        const targetName = resolvedTargetCube.name
 
        if (!reachable.has(targetName)) {
          reachable.add(targetName)
          queue.push(targetName)
        }
      }
    }
 
    return reachable
  }
 
  // Private cache management
 
  private getCacheKey(fromCube: string, toCube: string, excluded: Set<string>): string {
    const excludedStr = Array.from(excluded).sort().join(',')
    return `${fromCube}:${toCube}:${excludedStr}`
  }
 
  private getFromCache(key: string): InternalJoinPathStep[] | null | undefined {
    const entry = this.connectivityCache.get(key)
    if (!entry) {
      return undefined
    }
    return entry.path
  }
 
  private setInCache(key: string, path: InternalJoinPathStep[] | null): void {
    this.connectivityCache.set(key, { path })
  }
}