diff --git a/src/services/code-index/constants/index.ts b/src/services/code-index/constants/index.ts index 6f0e0fe7e6..1beb140992 100644 --- a/src/services/code-index/constants/index.ts +++ b/src/services/code-index/constants/index.ts @@ -24,6 +24,7 @@ export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate /**OpenAI Embedder */ export const MAX_BATCH_TOKENS = 100000 +export const MAX_BATCH_ITEMS = 32 // Maximum number of items per embedding API call (provider limit, fixes #335) export const MAX_ITEM_TOKENS = 8191 export const BATCH_PROCESSING_CONCURRENCY = 10 diff --git a/src/services/code-index/embedders/bedrock.ts b/src/services/code-index/embedders/bedrock.ts index b03d062cb7..a229823962 100644 --- a/src/services/code-index/embedders/bedrock.ts +++ b/src/services/code-index/embedders/bedrock.ts @@ -3,6 +3,7 @@ import { fromIni, fromNodeProviderChain } from "@aws-sdk/credential-providers" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces" import { MAX_BATCH_TOKENS, + MAX_BATCH_ITEMS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, @@ -83,7 +84,7 @@ export class BedrockEmbedder implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS && currentBatch.length < MAX_BATCH_ITEMS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) diff --git a/src/services/code-index/embedders/openai-compatible.ts b/src/services/code-index/embedders/openai-compatible.ts index 6eaf2b6c2c..6ae82ebf48 100644 --- a/src/services/code-index/embedders/openai-compatible.ts +++ b/src/services/code-index/embedders/openai-compatible.ts @@ -2,6 +2,7 @@ import { OpenAI } from "openai" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder" import { MAX_BATCH_TOKENS, + MAX_BATCH_ITEMS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, @@ -144,7 +145,7 @@ export class OpenAICompatibleEmbedder implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS && currentBatch.length < MAX_BATCH_ITEMS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) diff --git a/src/services/code-index/embedders/openai.ts b/src/services/code-index/embedders/openai.ts index b993e280d9..ed8c52943a 100644 --- a/src/services/code-index/embedders/openai.ts +++ b/src/services/code-index/embedders/openai.ts @@ -4,6 +4,7 @@ import { ApiHandlerOptions } from "../../../shared/api" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces" import { MAX_BATCH_TOKENS, + MAX_BATCH_ITEMS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, @@ -100,7 +101,7 @@ export class OpenAiEmbedder extends OpenAiNativeHandler implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS && currentBatch.length < MAX_BATCH_ITEMS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i) diff --git a/src/services/code-index/embedders/openrouter.ts b/src/services/code-index/embedders/openrouter.ts index d98aaeeeb5..b2851d2be7 100644 --- a/src/services/code-index/embedders/openrouter.ts +++ b/src/services/code-index/embedders/openrouter.ts @@ -2,6 +2,7 @@ import { OpenAI } from "openai" import { IEmbedder, EmbeddingResponse, EmbedderInfo } from "../interfaces/embedder" import { MAX_BATCH_TOKENS, + MAX_BATCH_ITEMS, MAX_ITEM_TOKENS, MAX_BATCH_RETRIES as MAX_RETRIES, INITIAL_RETRY_DELAY_MS as INITIAL_DELAY_MS, @@ -148,7 +149,7 @@ export class OpenRouterEmbedder implements IEmbedder { continue } - if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS) { + if (currentBatchTokens + itemTokens <= MAX_BATCH_TOKENS && currentBatch.length < MAX_BATCH_ITEMS) { currentBatch.push(text) currentBatchTokens += itemTokens processedIndices.push(i)