builders group-by-builder.ts

91.42% Statements 64/70
81.35% Branches 48/59
90% Functions 9/10
90.62% Lines 58/64
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885x
 
 
 
885x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885x
 
 
885x
 
 
 
 
 
885x
 
885x
 
 
885x
 
885x
 
885x
884x
884x
884x
884x
884x
883x
883x
 
 
 
 
1x
1x
1x
1x
1x
1x
1x
1x
1x
 
 
 
 
 
 
 
885x
 
 
 
 
885x
335x
380x
380x
380x
 
 
380x
 
380x
 
12x
12x
 
12x
 
 
 
 
 
12x
12x
 
 
 
368x
368x
368x
 
 
 
 
 
 
885x
128x
135x
135x
135x
 
 
135x
 
135x
 
23x
23x
 
23x
 
 
 
 
 
 
 
 
 
 
23x
23x
 
 
 
112x
112x
 
 
 
 
112x
 
 
 
 
 
 
 
 
 
885x
 
 
  /**
 * Group By Builder
 * Handles GROUP BY clause construction:
 * - Dimension grouping
 * - Time dimension grouping with granularity
 * - CTE-aware grouping
 * - Aggregate vs window function detection
 */
 
import {
  sql,
  SQL,
  type AnyColumn
} from 'drizzle-orm'
 
import type {
  Cube,
  SemanticQuery,
  QueryContext,
  JoinKeyInfo
} from '../types'
 
import { resolveSqlExpression } from '../cube-utils'
import { DateTimeBuilder } from './date-time-builder'
import { MeasureBuilder } from './measure-builder'
 
export class GroupByBuilder {
  constructor(private dateTimeBuilder: DateTimeBuilder) {}
 
  /**
   * Check if a measure type is a window function
   */
  isWindowFunctionType(measureType: string): boolean {
    const windowTypes = ['lag', 'lead', 'rank', 'denseRank', 'rowNumber',
                         'ntile', 'firstValue', 'lastValue', 'movingAvg', 'movingSum']
    return windowTypes.includes(measureType)
  }
 
  /**
   * Check if a measure type is an aggregate function (requires GROUP BY)
   *
   * Note: 'number' is included because users commonly define measures with raw SQL
   * aggregations (e.g., sql`COUNT(DISTINCT ...)`) and set type: 'number' for the output.
   * These measures still require GROUP BY when used with time dimensions.
   */
  isAggregateFunctionType(measureType: string): boolean {
    const aggTypes = ['count', 'countDistinct', 'sum', 'avg', 'min', 'max',
                      'stddev', 'stddevSamp', 'variance', 'varianceSamp',
                      'median', 'p95', 'p99', 'percentile',
                      'number']
    return aggTypes.includes(measureType)
  }
 
  /**
   * Build GROUP BY fields from dimensions and time dimensions
   * Works for both single and multi-cube queries
   *
   * NOTE: GROUP BY is only added when there are AGGREGATE measures.
   * Window functions do not require GROUP BY and operate on individual rows.
   */
  buildGroupByFields(
    cubes: Map<string, Cube> | Cube,
    query: SemanticQuery,
    context: QueryContext,
    queryPlan?: any // Optional QueryPlan for CTE handling
  ): (SQL | AnyColumn)[] {
    const groupFields: (SQL | AnyColumn)[] = []
 
    // Convert single cube to map for consistent handling
    const cubeMap = cubes instanceof Map ? cubes : new Map([[cubes.name, cubes]])
 
    // Determine if GROUP BY is needed:
    // 1. When there are aggregate measures
    // 2. When there are dimensions but NO measures (distinct values query)
    // This also includes post-aggregation window functions that reference aggregate base measures
    const hasDimensions = (query.dimensions && query.dimensions.length > 0) ||
                          (query.timeDimensions && query.timeDimensions.length > 0)
    const hasMeasures = query.measures && query.measures.length > 0
 
    // For dimension-only queries (no measures), we need GROUP BY for DISTINCT behavior
    const isDimensionOnlyQuery = hasDimensions && !hasMeasures
 
    let hasAggregateMeasures = false
 
    for (const measureName of query.measures || []) {
      const [cubeName, fieldName] = measureName.split('.')
      const cube = cubeMap.get(cubeName)
      Eif (cube && cube.measures && cube.measures[fieldName]) {
        const measure = cube.measures[fieldName]
        if (this.isAggregateFunctionType(measure.type) || measure.type === 'calculated') {
          hasAggregateMeasures = true
          break
        }
 
        // Check for post-aggregation window functions (e.g., RANK ordered by aggregated measure)
        // These require GROUP BY because they operate on aggregated data
        Eif (MeasureBuilder.isPostAggregationWindow(measure)) {
          const baseMeasureName = MeasureBuilder.getWindowBaseMeasure(measure, cubeName)
          Eif (baseMeasureName) {
            const [baseCubeName, baseFieldName] = baseMeasureName.split('.')
            const baseCube = cubeMap.get(baseCubeName)
            const baseMeasure = baseCube?.measures?.[baseFieldName]
            Eif (baseMeasure && this.isAggregateFunctionType(baseMeasure.type)) {
              hasAggregateMeasures = true
              break
            }
          }
        }
      }
    }
 
    // Skip GROUP BY only if we have measures that aren't aggregates (pure window functions)
    Iif (!hasAggregateMeasures && !isDimensionOnlyQuery) {
      return []
    }
 
    // Add dimensions to GROUP BY
    if (query.dimensions) {
      for (const dimensionName of query.dimensions) {
        const [cubeName, fieldName] = dimensionName.split('.')
        const cube = cubeMap.get(cubeName)
        Eif (cube && cube.dimensions && cube.dimensions[fieldName]) {
 
          // Check if this dimension is from a CTE cube
          const isFromCTE = queryPlan?.preAggregationCTEs?.some((cte: any) => cte.cube.name === cubeName)
 
          if (isFromCTE) {
            // For dimensions from CTE cubes, check if this is a join key that maps to the main table
            const cteInfo = queryPlan.preAggregationCTEs.find((cte: any) => cte.cube.name === cubeName)
            const matchingJoinKey = cteInfo.joinKeys.find((jk: JoinKeyInfo) => jk.targetColumn === fieldName)
 
            Iif (matchingJoinKey && matchingJoinKey.sourceColumnObj) {
              // Use the source column from the main table for GROUP BY instead of the CTE dimension
              groupFields.push(matchingJoinKey.sourceColumnObj)
            } else {
              // This dimension from CTE cube is not a join key - we need to reference it from the CTE
              // But only if it was included in the CTE selections
              const cteDimensionExpr = sql`${sql.identifier(cteInfo.cteAlias)}.${sql.identifier(fieldName)}`
              groupFields.push(cteDimensionExpr)
            }
          } else {
            // Regular dimension from non-CTE cube
            const dimension = cube.dimensions[fieldName]
            const dimensionExpr = resolveSqlExpression(dimension.sql, context)
            groupFields.push(dimensionExpr)
          }
        }
      }
    }
 
    // Add time dimensions to GROUP BY
    if (query.timeDimensions) {
      for (const timeDim of query.timeDimensions) {
        const [cubeName, fieldName] = timeDim.dimension.split('.')
        const cube = cubeMap.get(cubeName)
        Eif (cube && cube.dimensions && cube.dimensions[fieldName]) {
 
          // Check if this time dimension is from a CTE cube
          const isFromCTE = queryPlan?.preAggregationCTEs?.some((cte: any) => cte.cube.name === cubeName)
 
          if (isFromCTE) {
            // For time dimensions from CTE cubes, check if this is a join key that maps to the main table
            const cteInfo = queryPlan.preAggregationCTEs.find((cte: any) => cte.cube.name === cubeName)
            const matchingJoinKey = cteInfo.joinKeys.find((jk: JoinKeyInfo) => jk.targetColumn === fieldName)
 
            Iif (matchingJoinKey && matchingJoinKey.sourceColumnObj) {
              // Use the source column from the main table for GROUP BY with time granularity
              const timeExpr = this.dateTimeBuilder.buildTimeDimensionExpression(
                matchingJoinKey.sourceColumnObj,
                timeDim.granularity,
                context
              )
              groupFields.push(timeExpr)
            } else {
              // This time dimension from CTE cube is not a join key - reference it from the CTE
              // The CTE already has the time dimension expression applied, so just reference the column
              const cteDimensionExpr = sql`${sql.identifier(cteInfo.cteAlias)}.${sql.identifier(fieldName)}`
              groupFields.push(cteDimensionExpr)
            }
          } else {
            // Regular time dimension from non-CTE cube
            const dimension = cube.dimensions[fieldName]
            const timeExpr = this.dateTimeBuilder.buildTimeDimensionExpression(
              dimension.sql,
              timeDim.granularity,
              context
            )
            groupFields.push(timeExpr)
          }
        }
      }
    }
 
    // Note: We used to add join keys from CTEs to GROUP BY, but this is unnecessary
    // Join keys are only needed for the JOIN condition, not for grouping
    // The GROUP BY should only contain columns that are actually selected or used for aggregation
 
    return groupFields
  }
}