Memory and Context Management Architecture
Overview
The context management system prevents token overflow while preserving critical information. It intelligently trims conversation history using multiple strategies based on token costs.
The Engine: Context Trimming
File: src/server/repositories/engine/engine.ts:201
export const engine = async (
input: TEngineInput,
toolIterationInfo?: TToolIterationInfo,
): Promise<TEngineOutput> => {
const { messages, response, inLoop, contextLimit } = input;
// Build current message (assistant response + tool results)
const currentMessage = AssistantMessage({
content: [
{
type: "TEXT",
toolCalls: response.toolCalls,
text: response.content,
tokens: response.tokens.output + response.tokens.reasoning,
},
{ type: "TOOL_RESULT", toolResults: response.toolResults },
],
reasoning: response.reasoning,
});
// Calculate token counts for each section
const chatHistorySoFarTokens = getTokens(messages);
const inLoopTokens = calculateMessagesTokens(inLoop);
const inLoopSummaryIfPossibleTokens = calculateMessagesTokens(inLoop, true);
const currentMessageTokens = calculateMessagesTokens([currentMessage]);
const currentMessageSummaryIfPossibleTokens = calculateMessagesTokens(
[currentMessage],
true,
);
// Default summary size
const DEFAULT_SUMMARY_SIZE = 1024;
// Build token cost table for layout selection
const tokenTable: TTokenTable = {
HISTORY: {
FULL: chatHistorySoFarTokens,
TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: Infinity, // Never use for history
SUMMARY: DEFAULT_SUMMARY_SIZE,
},
IN_LOOP: {
FULL: inLoopTokens,
TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: inLoopSummaryIfPossibleTokens,
SUMMARY: DEFAULT_SUMMARY_SIZE,
},
CURRENT_MESSAGE: {
FULL: currentMessageTokens,
TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: currentMessageSummaryIfPossibleTokens,
SUMMARY: DEFAULT_SUMMARY_SIZE,
},
};
// Choose optimal layout (cheapest that fits within contextLimit)
const bestLayout = chooseOptimalLayout(contextLimit, tokenTable);
if (!bestLayout) throw new Error("LAYOUT_NOT_FOUND");
if (toolIterationInfo) {
toolIterationInfo.layout = bestLayout;
}
// Execute handlers for each section
const history_ = await HANDLER_MAP[bestLayout.HISTORY]({
messages,
config: input,
section: "HISTORY",
});
const inLoop_ = await HANDLER_MAP[bestLayout.IN_LOOP]({
messages: inLoop,
config: input,
section: "IN_LOOP",
});
const currentMessage_ = await HANDLER_MAP[bestLayout.CURRENT_MESSAGE]({
messages: [currentMessage],
config: input,
section: "CURRENT_MESSAGE",
});
return {
inLoop: [...inLoop_.trimmedMessages, ...currentMessage_.trimmedMessages],
summary: history_?.summary,
shouldDumpSummaryInDB:
chatHistorySoFarTokens +
inLoopSummaryIfPossibleTokens +
currentMessageSummaryIfPossibleTokens >
contextLimit,
};
};