discovery.ts

93.9% Statements 185/197
84.9% Branches 135/159
100% Functions 25/25
94.38% Lines 168/178
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42852x
 
42852x
546060x
 
 
42852x
301352x
 
 
42852x
503208x
3025500x
214820x
 
2810680x
 
 
 
 
 
 
 
 
42852x
 
 
 
 
 
 
45204x
45204x
 
 
45204x
 
 
44812x
 
 
42852x
42852x
78044x
78044x
 
 
 
42852x
42852x
42852x
 
42852x
 
 
 
 
 
 
2996x
2996x
7508x
7508x
108x
 
 
2996x
 
 
 
 
 
 
 
412x
 
 
 
 
 
 
 
 
 
 
 
 
412x
 
 
 
1464x
 
 
 
 
 
 
 
 
 
1064x
1064x
1064x
1064x
 
1064x
 
2484x
2484x
136x
136x
 
 
 
2484x
2484x
152x
152x
 
 
 
2484x
2480x
2480x
288x
288x
 
 
 
 
2484x
848x
1580x
1580x
264x
264x
 
 
 
 
 
2484x
6196x
 
 
6196x
6196x
 
 
6196x
 
 
6196x
2196x
 
 
 
6196x
1464x
 
 
6196x
696x
696x
696x
696x
 
 
 
 
2484x
6764x
 
 
6764x
6764x
 
 
6764x
 
 
6764x
 
 
 
 
6764x
1464x
 
 
6764x
132x
132x
132x
132x
 
 
 
 
 
1064x
 
 
1064x
328x
 
612x
 
1064x
36x
 
132x
 
1064x
 
 
 
 
 
 
 
808x
 
 
2368x
 
 
808x
1824x
 
 
 
 
 
808x
 
808x
 
 
 
 
 
 
 
 
 
 
 
404x
 
 
404x
284x
 
 
 
120x
 
 
120x
112x
112x
 
 
 
 
 
 
120x
440x
440x
48x
 
 
 
 
 
 
 
120x
 
 
120x
336x
112x
 
 
 
 
 
 
120x
440x
8x
 
 
 
 
 
 
 
120x
120x
440x
440x
 
 
 
 
 
 
112x
 
 
 
 
 
 
120x
 
 
 
 
 
 
 
 
 
 
404x
 
404x
284x
 
 
 
120x
40x
 
 
 
120x
112x
112x
 
 
 
120x
 
120x
 
 
 
 
 
 
 
 
 
416x
 
 
416x
416x
 
4x
12x
12x
12x
12x
 
12x
 
 
 
 
 
32x
32x
 
 
 
 
 
 
 
 
 
412x
412x
 
 
 
 
412x
412x
1064x
 
1064x
392x
392x
392x
 
 
392x
 
392x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412x
112x
 
 
 
 
 
 
 
 
 
 
 
60x
 
60x
 
100x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100x
100x
276x
276x
276x
276x
68x
 
 
276x
40x
 
 
 
 
 
60x
 
  /**
 * AI Discovery Engine
 * Schema-aware intelligence for discovering relevant cubes and fields
 */
 
import type { CubeMetadata } from '../types/metadata'
import { QUERY_SCHEMAS } from './schemas'
 
/**
 * Discovery result for a cube
 */
export interface CubeDiscoveryResult {
  cube: string
  title: string
  description?: string
  relevanceScore: number
  matchedOn: ('name' | 'title' | 'description' | 'exampleQuestions' | 'measures' | 'dimensions')[]
  suggestedMeasures: string[]
  suggestedDimensions: string[]
 
  // Analysis capabilities
  capabilities: {
    query: true
    funnel: boolean
    flow: boolean
    retention: boolean
  }
 
  // Config for advanced modes (only present if capabilities exist)
  analysisConfig?: {
    candidateBindingKeys: Array<{
      dimension: string
      description?: string
    }>
    candidateTimeDimensions: Array<{
      dimension: string
      description?: string
    }>
    candidateEventDimensions: Array<{
      dimension: string
      description?: string
    }>
  }
 
  // Hints for AI on next steps
  hints?: string[]
 
  // Query schemas (included when capabilities.funnel/flow/retention is true)
  querySchemas?: typeof QUERY_SCHEMAS
}
 
/**
 * Discovery request options
 */
export interface DiscoveryOptions {
  /** Topic or intent to search for */
  topic?: string
  /** Natural language intent */
  intent?: string
  /** Maximum number of results */
  limit?: number
  /** Minimum relevance score (0-1) */
  minScore?: number
}
 
/**
 * Calculate Levenshtein distance between two strings
 */
function levenshteinDistance(a: string, b: string): number {
  const matrix: number[][] = []
 
  for (let i = 0; i <= b.length; i++) {
    matrix[i] = [i]
  }
 
  for (let j = 0; j <= a.length; j++) {
    matrix[0][j] = j
  }
 
  for (let i = 1; i <= b.length; i++) {
    for (let j = 1; j <= a.length; j++) {
      if (b.charAt(i - 1) === a.charAt(j - 1)) {
        matrix[i][j] = matrix[i - 1][j - 1]
      } else {
        matrix[i][j] = Math.min(
          matrix[i - 1][j - 1] + 1,
          matrix[i][j - 1] + 1,
          matrix[i - 1][j] + 1
        )
      }
    }
  }
 
  return matrix[b.length][a.length]
}
 
/**
 * Calculate fuzzy match score between two strings (0-1, higher is better)
 */
function fuzzyMatchScore(query: string, target: string): number {
  const q = query.toLowerCase().trim()
  const t = target.toLowerCase().trim()
 
  // Exact match
  if (q === t) return 1.0
 
  // Contains match
  if (t.includes(q)) return 0.9
 
  // Word boundary match
  const words = t.split(/[\s_-]+/)
  for (const word of words) {
    Iif (word === q) return 0.85
    Iif (word.startsWith(q)) return 0.75
  }
 
  // Levenshtein-based fuzzy match
  const distance = levenshteinDistance(q, t)
  const maxLen = Math.max(q.length, t.length)
  const similarity = 1 - distance / maxLen
 
  return similarity > 0.5 ? similarity * 0.7 : 0
}
 
/**
 * Match a query against an array of strings (names, synonyms, etc.)
 */
function matchAgainstArray(query: string, targets: string[]): number {
  let bestScore = 0
  for (const target of targets) {
    const score = fuzzyMatchScore(query, target)
    if (score > bestScore) {
      bestScore = score
    }
  }
  return bestScore
}
 
/**
 * Extract keywords from a natural language query
 */
function extractKeywords(text: string): string[] {
  // Common stop words to filter out
  const stopWords = new Set([
    'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
    'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
    'should', 'may', 'might', 'must', 'can', 'and', 'or', 'but', 'if',
    'then', 'else', 'when', 'where', 'why', 'how', 'what', 'which', 'who',
    'this', 'that', 'these', 'those', 'i', 'me', 'my', 'we', 'our', 'you',
    'your', 'he', 'she', 'it', 'they', 'them', 'their', 'in', 'on', 'at',
    'to', 'for', 'of', 'with', 'by', 'from', 'up', 'down', 'out', 'over',
    'under', 'about', 'into', 'through', 'during', 'before', 'after',
    'above', 'below', 'between', 'show', 'me', 'get', 'find', 'list',
    'give', 'tell', 'display', 'want', 'need', 'see', 'know'
  ])
 
  return text
    .toLowerCase()
    .replace(/[^\w\s]/g, ' ')
    .split(/\s+/)
    .filter(word => word.length > 2 && !stopWords.has(word))
}
 
/**
 * Score a cube against discovery criteria
 */
function scoreCube(
  cube: CubeMetadata,
  keywords: string[]
): { score: number; matchedOn: CubeDiscoveryResult['matchedOn']; suggestedMeasures: string[]; suggestedDimensions: string[] } {
  let totalScore = 0
  const matchedOn: CubeDiscoveryResult['matchedOn'] = []
  const measureScores: Map<string, number> = new Map()
  const dimensionScores: Map<string, number> = new Map()
 
  for (const keyword of keywords) {
    // Match cube name
    const nameScore = fuzzyMatchScore(keyword, cube.name)
    if (nameScore > 0.5) {
      totalScore += nameScore * 2 // Weight cube name matches higher
      Eif (!matchedOn.includes('name')) matchedOn.push('name')
    }
 
    // Match cube title
    const titleScore = fuzzyMatchScore(keyword, cube.title)
    if (titleScore > 0.5) {
      totalScore += titleScore * 1.5
      Eif (!matchedOn.includes('title')) matchedOn.push('title')
    }
 
    // Match cube description
    if (cube.description) {
      const descScore = fuzzyMatchScore(keyword, cube.description)
      if (descScore > 0.3) {
        totalScore += descScore
        if (!matchedOn.includes('description')) matchedOn.push('description')
      }
    }
 
    // Match example questions
    if (cube.exampleQuestions) {
      for (const question of cube.exampleQuestions) {
        const qScore = fuzzyMatchScore(keyword, question)
        if (qScore > 0.3) {
          totalScore += qScore * 1.5 // Example questions are valuable
          if (!matchedOn.includes('exampleQuestions')) matchedOn.push('exampleQuestions')
        }
      }
    }
 
    // Match measures
    for (const measure of cube.measures) {
      let measureScore = 0
 
      // Match measure name (without cube prefix)
      const measureName = measure.name.split('.').pop() || measure.name
      measureScore = Math.max(measureScore, fuzzyMatchScore(keyword, measureName))
 
      // Match measure title
      measureScore = Math.max(measureScore, fuzzyMatchScore(keyword, measure.title))
 
      // Match measure description
      if (measure.description) {
        measureScore = Math.max(measureScore, fuzzyMatchScore(keyword, measure.description) * 0.8)
      }
 
      // Match measure synonyms
      if (measure.synonyms) {
        measureScore = Math.max(measureScore, matchAgainstArray(keyword, measure.synonyms))
      }
 
      if (measureScore > 0.4) {
        totalScore += measureScore
        if (!matchedOn.includes('measures')) matchedOn.push('measures')
        const currentScore = measureScores.get(measure.name) || 0
        measureScores.set(measure.name, Math.max(currentScore, measureScore))
      }
    }
 
    // Match dimensions
    for (const dimension of cube.dimensions) {
      let dimScore = 0
 
      // Match dimension name (without cube prefix)
      const dimName = dimension.name.split('.').pop() || dimension.name
      dimScore = Math.max(dimScore, fuzzyMatchScore(keyword, dimName))
 
      // Match dimension title
      dimScore = Math.max(dimScore, fuzzyMatchScore(keyword, dimension.title))
 
      // Match dimension description
      Iif (dimension.description) {
        dimScore = Math.max(dimScore, fuzzyMatchScore(keyword, dimension.description) * 0.8)
      }
 
      // Match dimension synonyms
      if (dimension.synonyms) {
        dimScore = Math.max(dimScore, matchAgainstArray(keyword, dimension.synonyms))
      }
 
      if (dimScore > 0.4) {
        totalScore += dimScore
        if (!matchedOn.includes('dimensions')) matchedOn.push('dimensions')
        const currentScore = dimensionScores.get(dimension.name) || 0
        dimensionScores.set(dimension.name, Math.max(currentScore, dimScore))
      }
    }
  }
 
  // Normalize score
  const normalizedScore = Math.min(1, totalScore / (keywords.length * 2))
 
  // Get top suggested measures and dimensions
  const suggestedMeasures = Array.from(measureScores.entries())
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([name]) => name)
 
  const suggestedDimensions = Array.from(dimensionScores.entries())
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([name]) => name)
 
  return { score: normalizedScore, matchedOn, suggestedMeasures, suggestedDimensions }
}
 
/**
 * Detect analysis capabilities from cube metadata
 */
function detectCapabilities(cube: CubeMetadata): CubeDiscoveryResult['capabilities'] {
  // Check if cube has explicit eventStream meta
  const hasEventStream = !!(cube.meta?.eventStream)
 
  // Check if cube has time dimensions (needed for analysis modes)
  const hasTimeDimension = cube.dimensions.some(d => d.type === 'time')
 
  // Check for potential binding keys (dimensions that could identify entities)
  const hasPotentialBindingKey = cube.dimensions.some(d =>
    d.name.toLowerCase().includes('id') ||
    d.type === 'number' ||
    (cube.meta?.eventStream?.bindingKey && d.name === cube.meta.eventStream.bindingKey)
  )
 
  // Analysis modes available if explicit eventStream OR has needed dimensions
  const supportsAnalysisModes = hasEventStream || (hasTimeDimension && hasPotentialBindingKey)
 
  return {
    query: true,
    funnel: supportsAnalysisModes,
    flow: supportsAnalysisModes,
    retention: supportsAnalysisModes
  }
}
 
/**
 * Build analysis config with candidate dimensions
 */
function buildAnalysisConfig(cube: CubeMetadata): CubeDiscoveryResult['analysisConfig'] | undefined {
  const capabilities = detectCapabilities(cube)
 
  // Only include config if analysis modes are available
  if (!capabilities.funnel && !capabilities.flow && !capabilities.retention) {
    return undefined
  }
 
  // Candidate binding keys: explicit from meta, or inferred from dimension names
  const candidateBindingKeys: Array<{ dimension: string; description?: string }> = []
 
  // Check explicit eventStream config first
  if (cube.meta?.eventStream?.bindingKey) {
    const bindingDim = cube.dimensions.find(d => d.name === cube.meta?.eventStream?.bindingKey)
    candidateBindingKeys.push({
      dimension: cube.meta.eventStream.bindingKey,
      description: bindingDim?.description || 'Configured binding key'
    })
  }
 
  // Add dimensions with 'id' in name as candidates
  for (const dim of cube.dimensions) {
    const dimShortName = dim.name.split('.').pop()?.toLowerCase() || ''
    if (dimShortName.includes('id') && !candidateBindingKeys.some(c => c.dimension === dim.name)) {
      candidateBindingKeys.push({
        dimension: dim.name,
        description: dim.description || `Potential entity identifier`
      })
    }
  }
 
  // Candidate time dimensions
  const candidateTimeDimensions: Array<{ dimension: string; description?: string }> = []
 
  // Check explicit eventStream config first
  if (cube.meta?.eventStream?.timeDimension) {
    const timeDim = cube.dimensions.find(d => d.name === cube.meta?.eventStream?.timeDimension)
    candidateTimeDimensions.push({
      dimension: cube.meta.eventStream.timeDimension,
      description: timeDim?.description || 'Configured time dimension'
    })
  }
 
  // Add all time dimensions as candidates
  for (const dim of cube.dimensions) {
    if (dim.type === 'time' && !candidateTimeDimensions.some(c => c.dimension === dim.name)) {
      candidateTimeDimensions.push({
        dimension: dim.name,
        description: dim.description
      })
    }
  }
 
  // Candidate event dimensions (string dimensions that could represent event types)
  const candidateEventDimensions: Array<{ dimension: string; description?: string }> = []
  for (const dim of cube.dimensions) {
    const dimShortName = dim.name.split('.').pop()?.toLowerCase() || ''
    if (dim.type === 'string' && (
      dimShortName.includes('type') ||
      dimShortName.includes('event') ||
      dimShortName.includes('status') ||
      dimShortName.includes('state') ||
      dimShortName.includes('action')
    )) {
      candidateEventDimensions.push({
        dimension: dim.name,
        description: dim.description || 'Potential event type dimension'
      })
    }
  }
 
  return {
    candidateBindingKeys,
    candidateTimeDimensions,
    candidateEventDimensions
  }
}
 
/**
 * Generate hints for AI on next steps
 */
function generateHints(_cube: CubeMetadata, analysisConfig?: CubeDiscoveryResult['analysisConfig']): string[] {
  const hints: string[] = []
 
  if (!analysisConfig) {
    return hints
  }
 
  // Hint about choosing binding key if multiple options
  if (analysisConfig.candidateBindingKeys.length > 1) {
    hints.push('Choose bindingKey based on what entity to track through the analysis')
  }
 
  // Hint about discovering event types
  if (analysisConfig.candidateEventDimensions.length > 0) {
    const eventDim = analysisConfig.candidateEventDimensions[0].dimension
    hints.push(`Query ${eventDim} dimension to discover available values for funnel steps`)
  }
 
  // General workflow hint
  hints.push('Use /mcp/load with a standard query to discover dimension values before building analysis queries')
 
  return hints
}
 
/**
 * Discover relevant cubes based on topic or intent
 */
export function discoverCubes(
  metadata: CubeMetadata[],
  options: DiscoveryOptions = {}
): CubeDiscoveryResult[] {
  const { topic, intent, limit = 10, minScore = 0.1 } = options
 
  // Combine topic and intent into search text
  const searchText = [topic, intent].filter(Boolean).join(' ')
  if (!searchText.trim()) {
    // Return all cubes with basic info if no search criteria
    return metadata.slice(0, limit).map(cube => {
      const capabilities = detectCapabilities(cube)
      const analysisConfig = buildAnalysisConfig(cube)
      const hints = generateHints(cube, analysisConfig)
      const hasAnalysisModes = capabilities.funnel || capabilities.flow || capabilities.retention
 
      return {
        cube: cube.name,
        title: cube.title,
        description: cube.description,
        relevanceScore: 1,
        matchedOn: [] as CubeDiscoveryResult['matchedOn'],
        suggestedMeasures: cube.measures.slice(0, 5).map(m => m.name),
        suggestedDimensions: cube.dimensions.slice(0, 5).map(d => d.name),
        capabilities,
        analysisConfig,
        hints: hints.length > 0 ? hints : undefined,
        querySchemas: hasAnalysisModes ? QUERY_SCHEMAS : undefined
      }
    })
  }
 
  // Extract keywords from search text
  const keywords = extractKeywords(searchText)
  Iif (keywords.length === 0) {
    return []
  }
 
  // Score each cube
  const results: CubeDiscoveryResult[] = []
  for (const cube of metadata) {
    const { score, matchedOn, suggestedMeasures, suggestedDimensions } = scoreCube(cube, keywords)
 
    if (score >= minScore) {
      const capabilities = detectCapabilities(cube)
      const analysisConfig = buildAnalysisConfig(cube)
      const hints = generateHints(cube, analysisConfig)
 
      // Only include schemas if analysis modes are available
      const hasAnalysisModes = capabilities.funnel || capabilities.flow || capabilities.retention
 
      results.push({
        cube: cube.name,
        title: cube.title,
        description: cube.description,
        relevanceScore: score,
        matchedOn,
        suggestedMeasures,
        suggestedDimensions,
        capabilities,
        analysisConfig,
        hints: hints.length > 0 ? hints : undefined,
        querySchemas: hasAnalysisModes ? QUERY_SCHEMAS : undefined
      })
    }
  }
 
  // Sort by relevance and limit
  return results
    .sort((a, b) => b.relevanceScore - a.relevanceScore)
    .slice(0, limit)
}
 
/**
 * Find the best matching field across all cubes
 */
export function findBestFieldMatch(
  metadata: CubeMetadata[],
  fieldName: string,
  fieldType?: 'measure' | 'dimension'
): { field: string; cube: string; score: number; type: 'measure' | 'dimension' } | null {
  let bestMatch: { field: string; cube: string; score: number; type: 'measure' | 'dimension' } | null = null
 
  for (const cube of metadata) {
    // Check measures
    Iif (!fieldType || fieldType === 'measure') {
      for (const measure of cube.measures) {
        const measureName = measure.name.split('.').pop() || measure.name
        let score = fuzzyMatchScore(fieldName, measureName)
        score = Math.max(score, fuzzyMatchScore(fieldName, measure.title))
        if (measure.synonyms) {
          score = Math.max(score, matchAgainstArray(fieldName, measure.synonyms))
        }
 
        if (score > 0.5 && (!bestMatch || score > bestMatch.score)) {
          bestMatch = { field: measure.name, cube: cube.name, score, type: 'measure' }
        }
      }
    }
 
    // Check dimensions
    Eif (!fieldType || fieldType === 'dimension') {
      for (const dimension of cube.dimensions) {
        const dimName = dimension.name.split('.').pop() || dimension.name
        let score = fuzzyMatchScore(fieldName, dimName)
        score = Math.max(score, fuzzyMatchScore(fieldName, dimension.title))
        if (dimension.synonyms) {
          score = Math.max(score, matchAgainstArray(fieldName, dimension.synonyms))
        }
 
        if (score > 0.5 && (!bestMatch || score > bestMatch.score)) {
          bestMatch = { field: dimension.name, cube: cube.name, score, type: 'dimension' }
        }
      }
    }
  }
 
  return bestMatch
}