All files / server cte-builder.ts

93.83% Statements 137/146
79.83% Branches 95/119
87.5% Functions 7/8
94.44% Lines 136/144

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418                                                                        133x                                   162x 162x     162x     162x   162x 162x       162x 1513x   151x                 162x 108x 108x     108x 108x             162x 162x   162x             162x 206x 206x 206x 206x   206x         162x 118x 146x 146x 12x 12x 12x           162x 24x 28x 28x 23x 23x 23x           162x         162x                 162x   184x     162x     162x     162x 24x 28x 28x 9x   9x 9x 9x 9x             162x 67x   78x 62x 62x     62x 9x   9x 3x 3x 3x 3x                     162x 43x 43x       43x 43x             162x 162x 162x   162x   162x 162x     162x           162x 162x     162x 270x 270x 162x 162x 108x             162x 162x 162x           162x 108x 108x 108x 108x             162x 118x 146x 146x 12x 12x 12x           162x 24x 28x 28x 23x 23x 23x         162x 162x     162x                             173x 162x       162x     162x   162x 162x 162x     162x                                   43x 43x     43x     43x 43x         43x 41x       2x     2x 2x         2x       43x         43x         43x   43x   43x 43x         43x                                          
/**
 * CTE (Common Table Expression) Builder
 * Handles pre-aggregation CTE generation for hasMany relationships
 * Extracted from QueryExecutor for single-responsibility
 */
 
import {
  and,
  eq,
  sql,
  SQL
} from 'drizzle-orm'
 
import type {
  SemanticQuery,
  QueryContext,
  QueryPlan,
  PropagatingFilter
} from './types'
 
import { resolveSqlExpression } from './cube-utils'
import type { QueryBuilder } from './query-builder'
 
/**
 * CTE information type extracted from QueryPlan
 */
export type CTEInfo = NonNullable<QueryPlan['preAggregationCTEs']>[0]
 
/**
 * CTEBuilder handles the construction of Common Table Expressions
 * for pre-aggregation in hasMany relationship queries.
 *
 * This enables efficient aggregation of "many" side data before joining,
 * preventing the Cartesian product explosion that would occur with direct JOINs.
 */
export class CTEBuilder {
  constructor(private queryBuilder: QueryBuilder) {}
 
  /**
   * Build pre-aggregation CTE for hasMany relationships
   *
   * Creates a CTE that:
   * 1. Selects join keys and aggregated measures
   * 2. Applies security context filtering
   * 3. Groups by join keys and requested dimensions
   * 4. Handles propagating filters from related cubes
   */
  buildPreAggregationCTE(
    cteInfo: CTEInfo,
    query: SemanticQuery,
    context: QueryContext,
    queryPlan: QueryPlan,
    preBuiltFilterMap?: Map<string, SQL[]>
  ): any {
    const cube = cteInfo.cube
    const cubeBase = cube.sql(context) // Gets security filtering!
 
    // Build selections for CTE - include join keys and measures
    const cteSelections: Record<string, any> = {}
 
    // Add join key columns - use the stored column objects
    for (const joinKey of cteInfo.joinKeys) {
      // Use the stored Drizzle column object if available
      Eif (joinKey.targetColumnObj) {
        cteSelections[joinKey.targetColumn] = joinKey.targetColumnObj
 
        // Also add an aliased version if there's a matching dimension with a different name
        // This allows the main query to reference it by dimension name
        for (const [dimName, dimension] of Object.entries(cube.dimensions || {}) as Array<[string, any]>) {
          if (dimension.sql === joinKey.targetColumnObj && dimName !== joinKey.targetColumn) {
            // Add an aliased version: "column_name" as "dimensionName"
            cteSelections[dimName] = sql`${joinKey.targetColumnObj}`.as(dimName) as unknown as any
          }
        }
      }
    }
 
    // Add downstream join keys for cubes that need to be joined through this CTE
    // Example: If Teams.name is a dimension and EmployeeTeams has a join to Teams,
    // we need to include team_id in the CTE so Teams can be joined through it
    if (cteInfo.downstreamJoinKeys) {
      for (const downstream of cteInfo.downstreamJoinKeys) {
        for (const joinKey of downstream.joinKeys) {
          // Add the source column (from CTE cube table) to SELECT
          // This is the FK column in the CTE cube that points to the downstream cube
          Eif (joinKey.sourceColumnObj) {
            cteSelections[joinKey.sourceColumn] = joinKey.sourceColumnObj
          }
        }
      }
    }
 
    // Add measures with aggregation using the centralized helper
    const cubeName = cube.name
    const cubeMap = new Map([[cubeName, cube]])
 
    const resolvedMeasures = this.queryBuilder.buildResolvedMeasures(
      cteInfo.measures,
      cubeMap,
      context
    )
 
    // Add all resolved measures to CTE selections
    for (const measureName of cteInfo.measures) {
      const [, fieldName] = measureName.split('.')
      const measureBuilder = resolvedMeasures.get(measureName)
      Eif (measureBuilder) {
        const measureExpr = measureBuilder()
        // Use just the field name as the column alias (SQL identifiers can't have dots)
        cteSelections[fieldName] = sql`${measureExpr}`.as(fieldName)
      }
    }
 
    // Add dimensions that are requested in the query from this cube
    if (query.dimensions) {
      for (const dimensionName of query.dimensions) {
        const [dimCubeName, fieldName] = dimensionName.split('.')
        if (dimCubeName === cubeName && cube.dimensions && cube.dimensions[fieldName]) {
          const dimension = cube.dimensions[fieldName]
          const dimensionExpr = this.queryBuilder.buildMeasureExpression({ sql: dimension.sql, type: 'number' }, context)
          cteSelections[fieldName] = sql`${dimensionExpr}`.as(fieldName)
        }
      }
    }
 
    // Add time dimensions that are requested in the query from this cube
    if (query.timeDimensions) {
      for (const timeDim of query.timeDimensions) {
        const [timeCubeName, fieldName] = timeDim.dimension.split('.')
        if (timeCubeName === cubeName && cube.dimensions && cube.dimensions[fieldName]) {
          const dimension = cube.dimensions[fieldName]
          const timeExpr = this.queryBuilder.buildTimeDimensionExpression(dimension.sql, timeDim.granularity, context)
          cteSelections[fieldName] = sql`${timeExpr}`.as(fieldName)
        }
      }
    }
 
    // Ensure we have at least one selection
    Iif (Object.keys(cteSelections).length === 0) {
      return null
    }
 
    // Build CTE query with security context applied
    let cteQuery = context.db
      .select(cteSelections)
      .from(cubeBase.from)
 
    // Add additional query-specific WHERE conditions for this cube
    // IMPORTANT: Only apply dimension filters in CTE WHERE clause, not measure filters
    // Measure filters should only be applied in HAVING clause of the main query
 
    // Create a modified query plan that doesn't skip filters for the current CTE cube
    const cteQueryPlan = queryPlan ? {
      ...queryPlan,
      preAggregationCTEs: queryPlan.preAggregationCTEs?.filter((cte: any) => cte.cube.name !== cube.name)
    } : undefined
 
    const whereConditions = this.queryBuilder.buildWhereConditions(cube, query, context, cteQueryPlan, preBuiltFilterMap)
 
    // Also add time dimension filters for this cube within the CTE
    const cteTimeFilters: any[] = []
 
    // Handle dateRange from timeDimensions property
    if (query.timeDimensions) {
      for (const timeDim of query.timeDimensions) {
        const [timeCubeName, fieldName] = timeDim.dimension.split('.')
        if (timeCubeName === cubeName && cube.dimensions && cube.dimensions[fieldName] && timeDim.dateRange) {
          const dimension = cube.dimensions[fieldName]
          // Use the raw field expression for date filtering (not the truncated version)
          const fieldExpr = this.queryBuilder.buildMeasureExpression({ sql: dimension.sql, type: 'number' }, context)
          const dateCondition = this.queryBuilder.buildDateRangeCondition(fieldExpr, timeDim.dateRange)
          Eif (dateCondition) {
            cteTimeFilters.push(dateCondition)
          }
        }
      }
    }
 
    // Handle inDateRange filters from filters array for time dimensions of this cube
    if (query.filters) {
      for (const filter of query.filters) {
        // Only handle simple filter conditions (not logical AND/OR)
        if (!('and' in filter) && !('or' in filter) && 'member' in filter && 'operator' in filter) {
          const filterCondition = filter as any
          const [filterCubeName, filterFieldName] = filterCondition.member.split('.')
 
          // Check if this filter is for a time dimension of this cube
          if (filterCubeName === cubeName && cube.dimensions && cube.dimensions[filterFieldName]) {
            const dimension = cube.dimensions[filterFieldName]
            // Check if this is a time dimension (date/time related) and has inDateRange filter
            if (filterCondition.operator === 'inDateRange') {
              const fieldExpr = this.queryBuilder.buildMeasureExpression({ sql: dimension.sql, type: 'number' }, context)
              const dateCondition = this.queryBuilder.buildDateRangeCondition(fieldExpr, filterCondition.values)
              Eif (dateCondition) {
                cteTimeFilters.push(dateCondition)
              }
            }
          }
        }
      }
    }
 
    // Handle propagating filters from related cubes
    // When cube A has filters and hasMany relationship to this CTE cube B,
    // A's filters should propagate via subquery: B.FK IN (SELECT A.PK FROM A WHERE filters)
    if (cteInfo.propagatingFilters && cteInfo.propagatingFilters.length > 0) {
      for (const propFilter of cteInfo.propagatingFilters) {
        const subqueryCondition = this.buildPropagatingFilterSubquery(
          propFilter,
          context
        )
        Eif (subqueryCondition) {
          cteTimeFilters.push(subqueryCondition)
        }
      }
    }
 
    // Combine security context, regular WHERE conditions, and time dimension filters into one WHERE clause
    // IMPORTANT: Must combine all conditions in a single WHERE call to avoid overriding
    const allCteConditions = []
    Eif (cubeBase.where) {
      allCteConditions.push(cubeBase.where)
    }
    allCteConditions.push(...whereConditions, ...cteTimeFilters)
 
    Eif (allCteConditions.length > 0) {
      const combinedWhere = allCteConditions.length === 1
        ? allCteConditions[0]
        : and(...allCteConditions)
      cteQuery = cteQuery.where(combinedWhere)
    }
 
    // All CTEs now use GROUP BY for pre-aggregation
    // Post-aggregation window functions are applied in the outer query, not in CTEs
    // Group by join keys (essential for pre-aggregation) and requested dimensions
    const groupByFields: any[] = []
    const addedColumnNames = new Set<string>() // Track added columns to avoid duplicates
 
    // Helper to add column if not already present
    const addGroupByField = (col: any) => {
      const colName = col?.name || (typeof col === 'string' ? col : null)
      if (colName && !addedColumnNames.has(colName)) {
        addedColumnNames.add(colName)
        groupByFields.push(col)
      I} else if (!colName) {
        // For expressions without a name, add directly
        groupByFields.push(col)
      }
    }
 
    // Add join key columns to GROUP BY
    for (const joinKey of cteInfo.joinKeys) {
      Eif (joinKey.targetColumnObj) {
        addGroupByField(joinKey.targetColumnObj)
      }
    }
 
    // Add downstream join keys to GROUP BY
    // These are needed so downstream cubes can be joined through this CTE
    if (cteInfo.downstreamJoinKeys) {
      for (const downstream of cteInfo.downstreamJoinKeys) {
        for (const joinKey of downstream.joinKeys) {
          Eif (joinKey.sourceColumnObj) {
            addGroupByField(joinKey.sourceColumnObj)
          }
        }
      }
    }
 
    // Add dimensions that are requested in the query from this cube to GROUP BY
    if (query.dimensions) {
      for (const dimensionName of query.dimensions) {
        const [dimCubeName, fieldName] = dimensionName.split('.')
        if (dimCubeName === cubeName && cube.dimensions && cube.dimensions[fieldName]) {
          const dimension = cube.dimensions[fieldName]
          const dimensionExpr = resolveSqlExpression(dimension.sql, context)
          groupByFields.push(dimensionExpr)
        }
      }
    }
 
    // Add time dimensions that are requested in the query from this cube to GROUP BY
    if (query.timeDimensions) {
      for (const timeDim of query.timeDimensions) {
        const [timeCubeName, fieldName] = timeDim.dimension.split('.')
        if (timeCubeName === cubeName && cube.dimensions && cube.dimensions[fieldName]) {
          const dimension = cube.dimensions[fieldName]
          const timeExpr = this.queryBuilder.buildTimeDimensionExpression(dimension.sql, timeDim.granularity, context)
          groupByFields.push(timeExpr)
        }
      }
    }
 
    Eif (groupByFields.length > 0) {
      cteQuery = cteQuery.groupBy(...groupByFields)
    }
 
    return context.db.$with(cteInfo.cteAlias).as(cteQuery)
  }
 
  /**
   * Build join condition for CTE
   *
   * Creates the ON clause for joining a CTE to the main query.
   * Uses stored column objects for type-safe joins.
   */
  buildCTEJoinCondition(
    joinCube: QueryPlan['joinCubes'][0],
    cteAlias: string,
    queryPlan: QueryPlan
  ): SQL {
    // Find the pre-aggregation info for this join cube
    const cteInfo = queryPlan.preAggregationCTEs?.find((cte: any) => cte.cube.name === joinCube.cube.name)
    Iif (!cteInfo) {
      throw new Error(`CTE info not found for cube ${joinCube.cube.name}`)
    }
 
    const conditions: SQL[] = []
 
    // Build join conditions using join keys
    for (const joinKey of cteInfo.joinKeys) {
      // Use the stored source column object if available, otherwise fall back to identifier
      const sourceCol = joinKey.sourceColumnObj || sql.identifier(joinKey.sourceColumn)
      const cteCol = sql`${sql.identifier(cteAlias)}.${sql.identifier(joinKey.targetColumn)}` // CTE column
      conditions.push(eq(sourceCol as any, cteCol))
    }
 
    return conditions.length === 1 ? conditions[0] : and(...conditions)!
  }
 
  /**
   * Build a subquery filter for propagating filters from related cubes.
   *
   * This generates: cteCube.FK IN (SELECT sourceCube.PK FROM sourceCube WHERE filters...)
   *
   * Example: For Productivity CTE with Employees.createdAt filter:
   * employee_id IN (SELECT id FROM employees WHERE organisation_id = $1 AND created_at >= $date)
   *
   * For composite keys, uses EXISTS instead of IN for better database compatibility:
   * EXISTS (SELECT 1 FROM source WHERE source.pk1 = cte.fk1 AND source.pk2 = cte.fk2 AND <filters>)
   */
  buildPropagatingFilterSubquery(
    propFilter: PropagatingFilter,
    context: QueryContext
  ): SQL | null {
    const sourceCube = propFilter.sourceCube
    const cubeBase = sourceCube.sql(context) // Gets security context filtering
 
    // Build filter conditions for the source cube
    const filterConditions: SQL[] = []
 
    // Add security context (already in cubeBase.where)
    Eif (cubeBase.where) {
      filterConditions.push(cubeBase.where)
    }
 
    // Use pre-built filter SQL if available (for parameter deduplication)
    // Otherwise fall back to building fresh
    if (propFilter.preBuiltFilterSQL) {
      filterConditions.push(propFilter.preBuiltFilterSQL)
    } else {
      // Fallback: Create a synthetic query with just the propagating filters
      // and use buildWhereConditions to process them
      const syntheticQuery: SemanticQuery = {
        filters: propFilter.filters
      }
      const cubeMap = new Map([[sourceCube.name, sourceCube]])
      const filterSQL = this.queryBuilder.buildWhereConditions(
        cubeMap,
        syntheticQuery,
        context
      )
      filterConditions.push(...filterSQL)
    }
 
    // If no filter conditions, no subquery needed
    Iif (filterConditions.length === 0) {
      return null
    }
 
    // Build the combined WHERE condition from filters
    const combinedWhere = filterConditions.length === 1
      ? filterConditions[0]
      : and(...filterConditions)
 
    // For composite keys, use EXISTS instead of IN for better database compatibility
    const joinConditions = propFilter.joinConditions
 
    if (joinConditions.length === 1) {
      // Single key: use simple IN clause
      const { source: sourcePK, target: cteFK } = joinConditions[0]
      const subquery = context.db
        .select({ pk: sourcePK })
        .from(cubeBase.from)
        .where(combinedWhere!)
 
      return sql`${cteFK} IN ${subquery}`
    } else E{
      // Composite keys: use EXISTS with all join conditions
      // Build join condition: source.pk1 = cte.fk1 AND source.pk2 = cte.fk2 ...
      const joinEqualityConditions = joinConditions.map(jc => eq(jc.source, jc.target))
 
      // Combine join conditions with filter conditions
      const existsWhere = and(
        ...joinEqualityConditions,
        combinedWhere!
      )
 
      const existsSubquery = context.db
        .select({ one: sql`1` })
        .from(cubeBase.from)
        .where(existsWhere!)
 
      return sql`EXISTS ${existsSubquery}`
    }
  }
}