07-MemoryAndContext

Memory and Context Management Architecture

Overview

The context management system prevents token overflow while preserving critical information. It intelligently trims conversation history using multiple strategies based on token costs.

The Engine: Context Trimming

File: src/server/repositories/engine/engine.ts:201

TypeScript

export const engine = async (
	input: TEngineInput,
	toolIterationInfo?: TToolIterationInfo,
): Promise<TEngineOutput> => {
	const { messages, response, inLoop, contextLimit } = input;

	// Build current message (assistant response + tool results)
	const currentMessage = AssistantMessage({
		content: [
			{
				type: "TEXT",
				toolCalls: response.toolCalls,
				text: response.content,
				tokens: response.tokens.output + response.tokens.reasoning,
			},
			{ type: "TOOL_RESULT", toolResults: response.toolResults },
		],
		reasoning: response.reasoning,
	});

	// Calculate token counts for each section
	const chatHistorySoFarTokens = getTokens(messages);
	const inLoopTokens = calculateMessagesTokens(inLoop);
	const inLoopSummaryIfPossibleTokens = calculateMessagesTokens(inLoop, true);
	const currentMessageTokens = calculateMessagesTokens([currentMessage]);
	const currentMessageSummaryIfPossibleTokens = calculateMessagesTokens(
		[currentMessage],
		true,
	);

	// Default summary size
	const DEFAULT_SUMMARY_SIZE = 1024;

	// Build token cost table for layout selection
	const tokenTable: TTokenTable = {
		HISTORY: {
			FULL: chatHistorySoFarTokens,
			TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: Infinity, // Never use for history
			SUMMARY: DEFAULT_SUMMARY_SIZE,
		},
		IN_LOOP: {
			FULL: inLoopTokens,
			TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: inLoopSummaryIfPossibleTokens,
			SUMMARY: DEFAULT_SUMMARY_SIZE,
		},
		CURRENT_MESSAGE: {
			FULL: currentMessageTokens,
			TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: currentMessageSummaryIfPossibleTokens,
			SUMMARY: DEFAULT_SUMMARY_SIZE,
		},
	};

	// Choose optimal layout (cheapest that fits within contextLimit)
	const bestLayout = chooseOptimalLayout(contextLimit, tokenTable);
	if (!bestLayout) throw new Error("LAYOUT_NOT_FOUND");

	if (toolIterationInfo) {
		toolIterationInfo.layout = bestLayout;
	}

	// Execute handlers for each section
	const history_ = await HANDLER_MAP[bestLayout.HISTORY]({
		messages,
		config: input,
		section: "HISTORY",
	});

	const inLoop_ = await HANDLER_MAP[bestLayout.IN_LOOP]({
		messages: inLoop,
		config: input,
		section: "IN_LOOP",
	});

	const currentMessage_ = await HANDLER_MAP[bestLayout.CURRENT_MESSAGE]({
		messages: [currentMessage],
		config: input,
		section: "CURRENT_MESSAGE",
	});

	return {
		inLoop: [...inLoop_.trimmedMessages, ...currentMessage_.trimmedMessages],
		summary: history_?.summary,
		shouldDumpSummaryInDB:
			chatHistorySoFarTokens +
				inLoopSummaryIfPossibleTokens +
				currentMessageSummaryIfPossibleTokens >
			contextLimit,
	};
};

export const engine = async ( input: TEngineInput, toolIterationInfo?: TToolIterationInfo, ): Promise<TEngineOutput> => { const { messages, response, inLoop, contextLimit } = input; // Build current message (assistant response + tool results) const currentMessage = AssistantMessage({ content: [ { type: "TEXT", toolCalls: response.toolCalls, text: response.content, tokens: response.tokens.output + response.tokens.reasoning, }, { type: "TOOL_RESULT", toolResults: response.toolResults }, ], reasoning: response.reasoning, }); // Calculate token counts for each section const chatHistorySoFarTokens = getTokens(messages); const inLoopTokens = calculateMessagesTokens(inLoop); const inLoopSummaryIfPossibleTokens = calculateMessagesTokens(inLoop, true); const currentMessageTokens = calculateMessagesTokens([currentMessage]); const currentMessageSummaryIfPossibleTokens = calculateMessagesTokens( [currentMessage], true, ); // Default summary size const DEFAULT_SUMMARY_SIZE = 1024; // Build token cost table for layout selection const tokenTable: TTokenTable = { HISTORY: { FULL: chatHistorySoFarTokens, TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: Infinity, // Never use for history SUMMARY: DEFAULT_SUMMARY_SIZE, }, IN_LOOP: { FULL: inLoopTokens, TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: inLoopSummaryIfPossibleTokens, SUMMARY: DEFAULT_SUMMARY_SIZE, }, CURRENT_MESSAGE: { FULL: currentMessageTokens, TOOL_PROVIDED_SUMMARY_IF_POSSIBLE: currentMessageSummaryIfPossibleTokens, SUMMARY: DEFAULT_SUMMARY_SIZE, }, }; // Choose optimal layout (cheapest that fits within contextLimit) const bestLayout = chooseOptimalLayout(contextLimit, tokenTable); if (!bestLayout) throw new Error("LAYOUT_NOT_FOUND"); if (toolIterationInfo) { toolIterationInfo.layout = bestLayout; } // Execute handlers for each section const history_ = await HANDLER_MAP[bestLayout.HISTORY]({ messages, config: input, section: "HISTORY", }); const inLoop_ = await HANDLER_MAP[bestLayout.IN_LOOP]({ messages: inLoop, config: input, section: "IN_LOOP", }); const currentMessage_ = await HANDLER_MAP[bestLayout.CURRENT_MESSAGE]({ messages: [currentMessage], config: input, section: "CURRENT_MESSAGE", }); return { inLoop: [...inLoop_.trimmedMessages, ...currentMessage_.trimmedMessages], summary: history_?.summary, shouldDumpSummaryInDB: chatHistorySoFarTokens + inLoopSummaryIfPossibleTokens + currentMessageSummaryIfPossibleTokens > contextLimit, }; };

Memory and Context Management Architecture

Overview

The Engine: Context Trimming