Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | 184x 885x 885x 885x 885x 885x 885x 885x 885x 885x 884x 884x 884x 884x 884x 883x 883x 1x 1x 1x 1x 1x 1x 1x 1x 1x 885x 885x 335x 380x 380x 380x 380x 380x 12x 12x 12x 12x 12x 368x 368x 368x 885x 128x 135x 135x 135x 135x 135x 23x 23x 23x 23x 23x 112x 112x 112x 885x | /**
* Group By Builder
* Handles GROUP BY clause construction:
* - Dimension grouping
* - Time dimension grouping with granularity
* - CTE-aware grouping
* - Aggregate vs window function detection
*/
import {
sql,
SQL,
type AnyColumn
} from 'drizzle-orm'
import type {
Cube,
SemanticQuery,
QueryContext,
JoinKeyInfo
} from '../types'
import { resolveSqlExpression } from '../cube-utils'
import { DateTimeBuilder } from './date-time-builder'
import { MeasureBuilder } from './measure-builder'
export class GroupByBuilder {
constructor(private dateTimeBuilder: DateTimeBuilder) {}
/**
* Check if a measure type is a window function
*/
isWindowFunctionType(measureType: string): boolean {
const windowTypes = ['lag', 'lead', 'rank', 'denseRank', 'rowNumber',
'ntile', 'firstValue', 'lastValue', 'movingAvg', 'movingSum']
return windowTypes.includes(measureType)
}
/**
* Check if a measure type is an aggregate function (requires GROUP BY)
*
* Note: 'number' is included because users commonly define measures with raw SQL
* aggregations (e.g., sql`COUNT(DISTINCT ...)`) and set type: 'number' for the output.
* These measures still require GROUP BY when used with time dimensions.
*/
isAggregateFunctionType(measureType: string): boolean {
const aggTypes = ['count', 'countDistinct', 'sum', 'avg', 'min', 'max',
'stddev', 'stddevSamp', 'variance', 'varianceSamp',
'median', 'p95', 'p99', 'percentile',
'number']
return aggTypes.includes(measureType)
}
/**
* Build GROUP BY fields from dimensions and time dimensions
* Works for both single and multi-cube queries
*
* NOTE: GROUP BY is only added when there are AGGREGATE measures.
* Window functions do not require GROUP BY and operate on individual rows.
*/
buildGroupByFields(
cubes: Map<string, Cube> | Cube,
query: SemanticQuery,
context: QueryContext,
queryPlan?: any // Optional QueryPlan for CTE handling
): (SQL | AnyColumn)[] {
const groupFields: (SQL | AnyColumn)[] = []
// Convert single cube to map for consistent handling
const cubeMap = cubes instanceof Map ? cubes : new Map([[cubes.name, cubes]])
// Determine if GROUP BY is needed:
// 1. When there are aggregate measures
// 2. When there are dimensions but NO measures (distinct values query)
// This also includes post-aggregation window functions that reference aggregate base measures
const hasDimensions = (query.dimensions && query.dimensions.length > 0) ||
(query.timeDimensions && query.timeDimensions.length > 0)
const hasMeasures = query.measures && query.measures.length > 0
// For dimension-only queries (no measures), we need GROUP BY for DISTINCT behavior
const isDimensionOnlyQuery = hasDimensions && !hasMeasures
let hasAggregateMeasures = false
for (const measureName of query.measures || []) {
const [cubeName, fieldName] = measureName.split('.')
const cube = cubeMap.get(cubeName)
Eif (cube && cube.measures && cube.measures[fieldName]) {
const measure = cube.measures[fieldName]
if (this.isAggregateFunctionType(measure.type) || measure.type === 'calculated') {
hasAggregateMeasures = true
break
}
// Check for post-aggregation window functions (e.g., RANK ordered by aggregated measure)
// These require GROUP BY because they operate on aggregated data
Eif (MeasureBuilder.isPostAggregationWindow(measure)) {
const baseMeasureName = MeasureBuilder.getWindowBaseMeasure(measure, cubeName)
Eif (baseMeasureName) {
const [baseCubeName, baseFieldName] = baseMeasureName.split('.')
const baseCube = cubeMap.get(baseCubeName)
const baseMeasure = baseCube?.measures?.[baseFieldName]
Eif (baseMeasure && this.isAggregateFunctionType(baseMeasure.type)) {
hasAggregateMeasures = true
break
}
}
}
}
}
// Skip GROUP BY only if we have measures that aren't aggregates (pure window functions)
Iif (!hasAggregateMeasures && !isDimensionOnlyQuery) {
return []
}
// Add dimensions to GROUP BY
if (query.dimensions) {
for (const dimensionName of query.dimensions) {
const [cubeName, fieldName] = dimensionName.split('.')
const cube = cubeMap.get(cubeName)
Eif (cube && cube.dimensions && cube.dimensions[fieldName]) {
// Check if this dimension is from a CTE cube
const isFromCTE = queryPlan?.preAggregationCTEs?.some((cte: any) => cte.cube.name === cubeName)
if (isFromCTE) {
// For dimensions from CTE cubes, check if this is a join key that maps to the main table
const cteInfo = queryPlan.preAggregationCTEs.find((cte: any) => cte.cube.name === cubeName)
const matchingJoinKey = cteInfo.joinKeys.find((jk: JoinKeyInfo) => jk.targetColumn === fieldName)
Iif (matchingJoinKey && matchingJoinKey.sourceColumnObj) {
// Use the source column from the main table for GROUP BY instead of the CTE dimension
groupFields.push(matchingJoinKey.sourceColumnObj)
} else {
// This dimension from CTE cube is not a join key - we need to reference it from the CTE
// But only if it was included in the CTE selections
const cteDimensionExpr = sql`${sql.identifier(cteInfo.cteAlias)}.${sql.identifier(fieldName)}`
groupFields.push(cteDimensionExpr)
}
} else {
// Regular dimension from non-CTE cube
const dimension = cube.dimensions[fieldName]
const dimensionExpr = resolveSqlExpression(dimension.sql, context)
groupFields.push(dimensionExpr)
}
}
}
}
// Add time dimensions to GROUP BY
if (query.timeDimensions) {
for (const timeDim of query.timeDimensions) {
const [cubeName, fieldName] = timeDim.dimension.split('.')
const cube = cubeMap.get(cubeName)
Eif (cube && cube.dimensions && cube.dimensions[fieldName]) {
// Check if this time dimension is from a CTE cube
const isFromCTE = queryPlan?.preAggregationCTEs?.some((cte: any) => cte.cube.name === cubeName)
if (isFromCTE) {
// For time dimensions from CTE cubes, check if this is a join key that maps to the main table
const cteInfo = queryPlan.preAggregationCTEs.find((cte: any) => cte.cube.name === cubeName)
const matchingJoinKey = cteInfo.joinKeys.find((jk: JoinKeyInfo) => jk.targetColumn === fieldName)
Iif (matchingJoinKey && matchingJoinKey.sourceColumnObj) {
// Use the source column from the main table for GROUP BY with time granularity
const timeExpr = this.dateTimeBuilder.buildTimeDimensionExpression(
matchingJoinKey.sourceColumnObj,
timeDim.granularity,
context
)
groupFields.push(timeExpr)
} else {
// This time dimension from CTE cube is not a join key - reference it from the CTE
// The CTE already has the time dimension expression applied, so just reference the column
const cteDimensionExpr = sql`${sql.identifier(cteInfo.cteAlias)}.${sql.identifier(fieldName)}`
groupFields.push(cteDimensionExpr)
}
} else {
// Regular time dimension from non-CTE cube
const dimension = cube.dimensions[fieldName]
const timeExpr = this.dateTimeBuilder.buildTimeDimensionExpression(
dimension.sql,
timeDim.granularity,
context
)
groupFields.push(timeExpr)
}
}
}
}
// Note: We used to add join keys from CTEs to GROUP BY, but this is unnecessary
// Join keys are only needed for the JOIN condition, not for grouping
// The GROUP BY should only contain columns that are actually selected or used for aggregation
return groupFields
}
}
|