RAG (검색 증강 생성) 데모

개요

이 예제는 Agno-Go를 사용하여 RAG 시스템을 구축하는 방법을 보여줍니다. RAG는 지식 베이스에서의 정보 검색과 LLM 텍스트 생성을 결합하여 정확하고 근거 있는 답변을 제공합니다. 이 시스템은 벡터 저장을 위한 ChromaDB, OpenAI 임베딩, 그리고 의미론적 검색 기능을 활성화하는 사용자 정의 RAG 툴킷을 사용합니다.

학습 내용

OpenAI 임베딩을 생성하고 사용하는 방법
벡터 데이터베이스로 ChromaDB를 설정하는 방법
최적의 검색을 위해 문서를 청크로 나누는 방법
Agent용 사용자 정의 RAG 툴킷을 구축하는 방법
지식 검색 기능을 가진 Agent를 만드는 방법
RAG 구현의 모범 사례

사전 요구 사항

Go 1.21 이상
OpenAI API 키
ChromaDB가 로컬에서 실행 중 (Docker를 통해)

설정

OpenAI API 키 설정:

bash

export OPENAI_API_KEY=sk-your-api-key-here

Docker를 사용하여 ChromaDB 시작:

bash

docker pull chromadb/chroma
docker run -p 8000:8000 chromadb/chroma

예제 디렉토리로 이동:

bash

cd cmd/examples/rag_demo

전체 코드

package main

import (
	"context"
	"fmt"
	"log"
	"os"

	"github.com/rexleimo/agno-go/pkg/agno/agent"
	openaiembed "github.com/rexleimo/agno-go/pkg/agno/embeddings/openai"
	"github.com/rexleimo/agno-go/pkg/agno/knowledge"
	openaimodel "github.com/rexleimo/agno-go/pkg/agno/models/openai"
	"github.com/rexleimo/agno-go/pkg/agno/tools/toolkit"
	"github.com/rexleimo/agno-go/pkg/agno/vectordb"
	"github.com/rexleimo/agno-go/pkg/agno/vectordb/chromadb"
)

// RAGToolkit provides knowledge retrieval tools for the agent
type RAGToolkit struct {
	*toolkit.BaseToolkit
	vectorDB vectordb.VectorDB
}

// NewRAGToolkit creates a new RAG toolkit
func NewRAGToolkit(db vectordb.VectorDB) *RAGToolkit {
	t := &RAGToolkit{
		BaseToolkit: toolkit.NewBaseToolkit("knowledge_retrieval"),
		vectorDB:    db,
	}

	// Register search function
	t.RegisterFunction(&toolkit.Function{
		Name:        "search_knowledge",
		Description: "Search the knowledge base for relevant information. Use this to find answers to user questions.",
		Parameters: map[string]toolkit.Parameter{
			"query": {
				Type:        "string",
				Description: "The search query or question",
				Required:    true,
			},
			"limit": {
				Type:        "integer",
				Description: "Maximum number of results to return (default: 3)",
				Required:    false,
			},
		},
		Handler: t.searchKnowledge,
	})

	return t
}

func (t *RAGToolkit) searchKnowledge(ctx context.Context, args map[string]interface{}) (interface{}, error) {
	query, ok := args["query"].(string)
	if !ok {
		return nil, fmt.Errorf("query must be a string")
	}

	limit := 3
	if l, ok := args["limit"].(float64); ok {
		limit = int(l)
	}

	results, err := t.vectorDB.Query(ctx, query, limit, nil)
	if err != nil {
		return nil, fmt.Errorf("failed to search knowledge base: %w", err)
	}

	// Format results for the agent
	var formattedResults []map[string]interface{}
	for i, result := range results {
		formattedResults = append(formattedResults, map[string]interface{}{
			"rank":     i + 1,
			"content":  result.Content,
			"score":    result.Score,
			"metadata": result.Metadata,
		})
	}

	return formattedResults, nil
}

func main() {
	fmt.Println("🚀 RAG (Retrieval-Augmented Generation) Demo")
	fmt.Println("This example demonstrates:")
	fmt.Println("1. Loading documents from files")
	fmt.Println("2. Chunking text into smaller pieces")
	fmt.Println("3. Generating embeddings with OpenAI")
	fmt.Println("4. Storing in ChromaDB vector database")
	fmt.Println("5. Using RAG with an Agent to answer questions")
	fmt.Println()

	// Check environment variables
	openaiKey := os.Getenv("OPENAI_API_KEY")
	if openaiKey == "" {
		log.Fatal("OPENAI_API_KEY environment variable is required")
	}

	ctx := context.Background()

	// Step 1: Create embedding function
	fmt.Println("📊 Step 1: Creating OpenAI embedding function...")
	embedFunc, err := openaiembed.New(openaiembed.Config{
		APIKey: openaiKey,
		Model:  "text-embedding-3-small",
	})
	if err != nil {
		log.Fatalf("Failed to create embedding function: %v", err)
	}
	fmt.Printf("   ✅ Created embedding function (model: %s, dimensions: %d)\n\n",
		embedFunc.GetModel(), embedFunc.GetDimensions())

	// Step 2: Create ChromaDB vector database
	fmt.Println("💾 Step 2: Connecting to ChromaDB...")
	db, err := chromadb.New(chromadb.Config{
		BaseURL:           "http://localhost:8000",
		CollectionName:    "rag_demo",
		EmbeddingFunction: embedFunc,
	})
	if err != nil {
		log.Fatalf("Failed to create ChromaDB: %v", err)
	}
	defer db.Close()

	// Create collection
	err = db.CreateCollection(ctx, "", map[string]interface{}{
		"description": "RAG demo knowledge base",
	})
	if err != nil {
		log.Fatalf("Failed to create collection: %v", err)
	}
	fmt.Println("   ✅ Connected to ChromaDB and created collection")

	// Step 3: Load and process documents
	fmt.Println("📚 Step 3: Loading and processing documents...")

	// Sample documents about AI and ML
	sampleDocs := []knowledge.Document{
		{
			ID:      "doc1",
			Content: "Artificial Intelligence (AI) is the simulation of human intelligence by machines. AI systems can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. Modern AI is based on machine learning algorithms that can learn from data.",
			Metadata: map[string]interface{}{
				"topic": "AI Overview",
				"date":  "2025-01-01",
			},
		},
		{
			ID:      "doc2",
			Content: "Machine Learning (ML) is a subset of AI that focuses on creating systems that learn from data. Instead of being explicitly programmed, ML models improve their performance through experience. Common ML algorithms include neural networks, decision trees, and support vector machines.",
			Metadata: map[string]interface{}{
				"topic": "Machine Learning",
				"date":  "2025-01-01",
			},
		},
		{
			ID:      "doc3",
			Content: "Vector databases are specialized databases designed to store and query high-dimensional vector embeddings. They enable semantic search by finding similar vectors using distance metrics like cosine similarity or Euclidean distance. Vector databases are essential for RAG (Retrieval-Augmented Generation) systems.",
			Metadata: map[string]interface{}{
				"topic": "Vector Databases",
				"date":  "2025-01-01",
			},
		},
		{
			ID:      "doc4",
			Content: "Retrieval-Augmented Generation (RAG) combines information retrieval with text generation. It first retrieves relevant documents from a knowledge base, then uses a language model to generate responses based on the retrieved context. RAG improves accuracy and reduces hallucinations in AI systems.",
			Metadata: map[string]interface{}{
				"topic": "RAG",
				"date":  "2025-01-01",
			},
		},
		{
			ID:      "doc5",
			Content: "Large Language Models (LLMs) like GPT-4 are neural networks trained on vast amounts of text data. They can understand and generate human-like text, perform reasoning, answer questions, and even write code. LLMs are the foundation of modern AI assistants and chatbots.",
			Metadata: map[string]interface{}{
				"topic": "Large Language Models",
				"date":  "2025-01-01",
			},
		},
	}

	// Chunk documents (optional, useful for large documents)
	chunker := knowledge.NewCharacterChunker(500, 50)
	var allChunks []knowledge.Chunk
	for _, doc := range sampleDocs {
		chunks, err := chunker.Chunk(doc)
		if err != nil {
			log.Printf("Warning: Failed to chunk document %s: %v", doc.ID, err)
			continue
		}
		allChunks = append(allChunks, chunks...)
	}
	fmt.Printf("   ✅ Loaded %d documents, created %d chunks\n", len(sampleDocs), len(allChunks))

	// Step 4: Generate embeddings and store in vector DB
	fmt.Println("\n🔢 Step 4: Generating embeddings and storing in ChromaDB...")

	var vdbDocs []vectordb.Document
	for _, chunk := range allChunks {
		vdbDocs = append(vdbDocs, vectordb.Document{
			ID:       chunk.ID,
			Content:  chunk.Content,
			Metadata: chunk.Metadata,
			// Embedding will be generated automatically by ChromaDB
		})
	}

	err = db.Add(ctx, vdbDocs)
	if err != nil {
		log.Fatalf("Failed to add documents to vector DB: %v", err)
	}

	count, _ := db.Count(ctx)
	fmt.Printf("   ✅ Stored %d documents in vector database\n\n", count)

	// Step 5: Test retrieval
	fmt.Println("🔍 Step 5: Testing knowledge retrieval...")
	testQuery := "What is machine learning?"
	results, err := db.Query(ctx, testQuery, 2, nil)
	if err != nil {
		log.Fatalf("Failed to query: %v", err)
	}

	fmt.Printf("   Query: \"%s\"\n", testQuery)
	fmt.Printf("   Found %d relevant documents:\n", len(results))
	for i, result := range results {
		fmt.Printf("   %d. [Score: %.4f] %s\n", i+1, result.Score,
			truncate(result.Content, 80))
	}
	fmt.Println()

	// Step 6: Create RAG-powered Agent
	fmt.Println("🤖 Step 6: Creating RAG-powered Agent...")

	// Create OpenAI model
	model, err := openaimodel.New("gpt-4o-mini", openaimodel.Config{
		APIKey:      openaiKey,
		Temperature: 0.7,
		MaxTokens:   500,
	})
	if err != nil {
		log.Fatalf("Failed to create model: %v", err)
	}

	// Create RAG toolkit
	ragToolkit := NewRAGToolkit(db)

	// Create agent with RAG capabilities
	ag, err := agent.New(agent.Config{
		Name:     "RAG Assistant",
		Model:    model,
		Toolkits: []toolkit.Toolkit{ragToolkit},
		Instructions: `You are a helpful AI assistant with access to a knowledge base.
When users ask questions:
1. Use the search_knowledge tool to find relevant information
2. Base your answer on the retrieved information
3. Cite the sources when possible
4. If you can't find relevant information, say so

Always be helpful, accurate, and concise.`,
		MaxLoops: 5,
	})
	if err != nil {
		log.Fatalf("Failed to create agent: %v", err)
	}
	fmt.Println("   ✅ Agent created with RAG capabilities")

	// Step 7: Interactive Q&A
	fmt.Println("💬 Step 7: Interactive Q&A (RAG in action)")
	fmt.Println("=" + string(make([]byte, 60)) + "=")

	questions := []string{
		"What is artificial intelligence?",
		"Explain the difference between AI and machine learning",
		"What are vector databases used for?",
		"How does RAG improve AI systems?",
	}

	for i, question := range questions {
		fmt.Printf("\n[Question %d] User: %s\n", i+1, question)

		output, err := ag.Run(ctx, question)
		if err != nil {
			log.Printf("Error: %v", err)
			continue
		}

		fmt.Printf("Assistant: %s\n", output.Content)
	}

	fmt.Println("\n" + string(make([]byte, 60)) + "=")
	fmt.Println("\n✅ RAG Demo completed successfully!")
	fmt.Println("\nKey Takeaways:")
	fmt.Println("• Documents are chunked and embedded automatically")
	fmt.Println("• Vector database enables semantic search")
	fmt.Println("• Agent uses RAG to provide accurate, grounded answers")
	fmt.Println("• Citations and sources improve trustworthiness")

	// Cleanup
	fmt.Println("\n🧹 Cleaning up...")
	err = db.DeleteCollection(ctx, "rag_demo")
	if err != nil {
		log.Printf("Warning: Failed to delete collection: %v", err)
	} else {
		fmt.Println("   ✅ Deleted demo collection")
	}
}

func truncate(s string, maxLen int) string {
	if len(s) <= maxLen {
		return s
	}
	return s[:maxLen] + "..."
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

코드 설명

1. 사용자 정의 RAG 툴킷

type RAGToolkit struct {
	*toolkit.BaseToolkit
	vectorDB vectordb.VectorDB
}

RAG 툴킷은 벡터 데이터베이스를 래핑하고 Agent가 호출할 수 있는 search_knowledge 함수를 노출합니다:

쿼리 문자열과 선택적 제한을 받음
의미론적 유사성을 사용하여 벡터 데이터베이스 검색
관련성 점수와 함께 형식화된 결과 반환

2. OpenAI 임베딩

embedFunc, err := openaiembed.New(openaiembed.Config{
	APIKey: openaiKey,
	Model:  "text-embedding-3-small",
})

OpenAI의 text-embedding-3-small 모델 사용 (1536 차원)
텍스트를 밀집 벡터 표현으로 변환
의미론적 유사성 검색 활성화

3. ChromaDB 벡터 데이터베이스

db, err := chromadb.New(chromadb.Config{
	BaseURL:           "http://localhost:8000",
	CollectionName:    "rag_demo",
	EmbeddingFunction: embedFunc,
})

로컬 ChromaDB 인스턴스에 연결
문서 임베딩 저장을 위한 컬렉션 생성
문서가 추가될 때 자동으로 임베딩 생성

4. 문서 청킹

chunker := knowledge.NewCharacterChunker(500, 50)

문서를 500자 청크로 분할
청크 간 50자 중복
청크 경계를 넘어 컨텍스트 보존
긴 문서의 검색 정확도 향상

5. RAG 기능을 가진 Agent

ag, err := agent.New(agent.Config{
	Name:     "RAG Assistant",
	Model:    model,
	Toolkits: []toolkit.Toolkit{ragToolkit},
	Instructions: `You are a helpful AI assistant with access to a knowledge base.
When users ask questions:
1. Use the search_knowledge tool to find relevant information
2. Base your answer on the retrieved information
3. Cite the sources when possible
4. If you can't find relevant information, say so`,
	MaxLoops: 5,
})

지침은 Agent에게 다음을 알려줍니다:

지식 검색 도구를 사용할 때
검색된 정보를 통합하는 방법
투명성을 위해 출처를 인용할 것
정보를 사용할 수 없을 때 정직할 것

예제 실행

bash

# ChromaDB가 실행 중인지 확인
docker run -p 8000:8000 chromadb/chroma

# 데모 실행
go run main.go

예상 출력

🚀 RAG (Retrieval-Augmented Generation) Demo
This example demonstrates:
1. Loading documents from files
2. Chunking text into smaller pieces
3. Generating embeddings with OpenAI
4. Storing in ChromaDB vector database
5. Using RAG with an Agent to answer questions

📊 Step 1: Creating OpenAI embedding function...
   ✅ Created embedding function (model: text-embedding-3-small, dimensions: 1536)

💾 Step 2: Connecting to ChromaDB...
   ✅ Connected to ChromaDB and created collection

📚 Step 3: Loading and processing documents...
   ✅ Loaded 5 documents, created 5 chunks

🔢 Step 4: Generating embeddings and storing in ChromaDB...
   ✅ Stored 5 documents in vector database

🔍 Step 5: Testing knowledge retrieval...
   Query: "What is machine learning?"
   Found 2 relevant documents:
   1. [Score: 0.8234] Machine Learning (ML) is a subset of AI that focuses on creating syste...
   2. [Score: 0.7156] Artificial Intelligence (AI) is the simulation of human intelligence b...

🤖 Step 6: Creating RAG-powered Agent...
   ✅ Agent created with RAG capabilities

💬 Step 7: Interactive Q&A (RAG in action)
============================================================

[Question 1] User: What is artificial intelligence?
Assistant: Based on the knowledge base, Artificial Intelligence (AI) is the simulation of human intelligence by machines. AI systems can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. Modern AI is based on machine learning algorithms that can learn from data.

[Question 2] User: Explain the difference between AI and machine learning
Assistant: AI (Artificial Intelligence) is the broader concept of machines simulating human intelligence to perform various tasks. Machine Learning (ML) is a subset of AI that specifically focuses on creating systems that learn from data rather than being explicitly programmed. So while all ML is AI, not all AI is ML. ML improves performance through experience, using algorithms like neural networks and decision trees.

[Question 3] User: What are vector databases used for?
Assistant: According to the knowledge base, vector databases are specialized databases designed to store and query high-dimensional vector embeddings. They enable semantic search by finding similar vectors using distance metrics like cosine similarity or Euclidean distance. Vector databases are essential for RAG (Retrieval-Augmented Generation) systems.

[Question 4] User: How does RAG improve AI systems?
Assistant: RAG (Retrieval-Augmented Generation) improves AI systems by combining information retrieval with text generation. The process first retrieves relevant documents from a knowledge base, then uses a language model to generate responses based on the retrieved context. This approach improves accuracy and reduces hallucinations in AI systems by grounding responses in actual knowledge.

============================================================

✅ RAG Demo completed successfully!

Key Takeaways:
• Documents are chunked and embedded automatically
• Vector database enables semantic search
• Agent uses RAG to provide accurate, grounded answers
• Citations and sources improve trustworthiness

🧹 Cleaning up...
   ✅ Deleted demo collection

주요 개념

RAG 파이프라인

수집: 문서가 로드되고 청크로 분할됨
임베딩: 청크가 벡터 임베딩으로 변환됨
저장: 임베딩이 벡터 데이터베이스에 저장됨
검색: 사용자 쿼리가 임베딩되고 유사한 문서가 발견됨
생성: LLM이 검색된 컨텍스트를 기반으로 답변 생성

의미론적 검색

키워드 검색과 달리 의미론적 검색은 의미를 기반으로 문서를 찾습니다:

쿼리: "What is ML?"
일치: "Machine Learning", "neural networks", "training models"에 대한 문서
정확한 키워드 매칭보다 나음

청킹 전략

문서 청킹 매개변수는 검색 품질에 영향을 미칩니다:

청크 크기 (500): 컨텍스트와 정밀도 사이의 균형
- 너무 작음: 컨텍스트 손실
- 너무 큼: 관련 없는 콘텐츠 검색
중복 (50): 중요한 정보 분할 방지
- 청크 간 연속성 보장
- 경계를 넘어가는 문장에 중요

사용자 정의 툴킷

RAG 툴킷은 사용자 정의 도구를 구축하는 방법을 보여줍니다:

t.RegisterFunction(&toolkit.Function{
	Name:        "search_knowledge",
	Description: "Search the knowledge base...",
	Parameters: map[string]toolkit.Parameter{
		"query": {
			Type:        "string",
			Description: "The search query",
			Required:    true,
		},
	},
	Handler: t.searchKnowledge,
})

주요 요소:

LLM이 이해할 수 있는 명확한 이름과 설명
타입이 잘 정의된 매개변수
로직을 구현하는 핸들러 함수

고급 기능

메타데이터 필터링

메타데이터로 결과를 필터링할 수 있습니다:

results, err := db.Query(ctx, query, limit, map[string]interface{}{
	"topic": "Machine Learning",
	"date": map[string]interface{}{
		"$gte": "2025-01-01",
	},
})

하이브리드 검색

의미론적 검색과 키워드 검색 결합:

// 향후 기능 - 아직 구현되지 않음
results, err := db.HybridQuery(ctx, query, limit, HybridConfig{
	SemanticWeight: 0.7,
	KeywordWeight:  0.3,
})

재순위

더 강력한 모델로 재순위하여 결과 향상:

// 향후 기능 - 아직 구현되지 않음
reranked := reranker.Rerank(results, query, limit)

모범 사례

청크 크기: 500-1000자로 시작하여 콘텐츠에 따라 조정
중복: 청크 크기의 10-20% 중복 사용
임베딩: 가능한 경우 도메인별 임베딩 사용
메타데이터: 필터링 및 인용을 위해 메타데이터 포함
오류 처리: 항상 검색 실패를 우아하게 처리
캐싱: 자주 액세스하는 임베딩 캐싱 고려
모니터링: 검색 품질을 추적하고 매개변수 조정

문제 해결

오류: "Failed to connect to ChromaDB"

ChromaDB가 실행 중인지 확인: docker ps | grep chroma
포트 확인 (기본값: 8000): curl http://localhost:8000/api/v1/heartbeat

오류: "OPENAI_API_KEY environment variable is required"

API 키 설정: export OPENAI_API_KEY=sk-...

낮은 검색 품질

청크 크기와 중복 조정
다른 임베딩 모델 시도
더 많은 관련 문서 추가
메타데이터 필터링 사용

높은 지연 시간

더 작은 임베딩 모델 사용
결과 수 줄이기 (limit 매개변수)
임베딩 캐싱 고려
로컬 임베딩 모델 사용 (향후 기능)

다음 단계

기본 Agent 사용을 위해 Simple Agent 탐색
여러 Agent와 Team 협업에 대해 배우기
복잡한 RAG 파이프라인을 위한 Workflow 엔진 시도
AgentOS API로 프로덕션 RAG 구축

RAG (검색 증강 생성) 데모 ​

개요 ​

학습 내용 ​

사전 요구 사항 ​

설정 ​

전체 코드 ​

코드 설명 ​

1. 사용자 정의 RAG 툴킷 ​

2. OpenAI 임베딩 ​

3. ChromaDB 벡터 데이터베이스 ​

4. 문서 청킹 ​

5. RAG 기능을 가진 Agent ​

예제 실행 ​

예상 출력 ​

주요 개념 ​

RAG 파이프라인 ​

의미론적 검색 ​

청킹 전략 ​

사용자 정의 툴킷 ​

고급 기능 ​

메타데이터 필터링 ​

하이브리드 검색 ​

재순위 ​

모범 사례 ​

문제 해결 ​

다음 단계 ​

추가 리소스 ​

RAG (검색 증강 생성) 데모

개요

학습 내용

사전 요구 사항

설정

전체 코드

코드 설명

1. 사용자 정의 RAG 툴킷

2. OpenAI 임베딩

3. ChromaDB 벡터 데이터베이스

4. 문서 청킹

5. RAG 기능을 가진 Agent

예제 실행

예상 출력

주요 개념

RAG 파이프라인

의미론적 검색

청킹 전략

사용자 정의 툴킷

고급 기능

메타데이터 필터링

하이브리드 검색

재순위

모범 사례

문제 해결

다음 단계

추가 리소스