-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathconfig.yaml
More file actions
59 lines (51 loc) · 1.81 KB
/
config.yaml
File metadata and controls
59 lines (51 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Database Configuration
database:
db_host: "localhost" # Database host address
db_port: 1972 # Database port number
db_user: "SuperUser" # Database username (default for docker)
db_password: "SYS" # Database password (default for docker ISC_DEFAULT_PASSWORD)
db_namespace: "USER" # Database namespace
# Default Embedding Model Configuration
embedding_model:
name: "sentence-transformers/all-MiniLM-L6-v2" # Name of the sentence transformer model
dimension: 384 # Embedding dimension
# Default Chunking Parameters
chunking:
chunk_size: 1000 # Target size of text chunks (e.g., in tokens or characters)
chunk_overlap: 200 # Number of tokens/characters to overlap between chunks
# Data Directories
paths:
data_dir: "data/" # Root directory for all data
pmc_sample_dir: "data/pmc_sample/" # Directory for PMC sample documents
# Logging Configuration
logging:
log_level: "INFO" # Logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" # Logging format string
# Test Configuration
testing:
min_docs_e2e: 1000 # Minimum documents required for E2E tests
# Storage Backend Configuration for iris_rag
storage:
backends:
iris:
type: "iris"
connection_type: "dbapi"
schema: "RAG"
table_prefix: ""
vector_dimension: 384
# Pipeline Configuration for iris_rag
pipelines:
basic:
chunk_size: 1000
chunk_overlap: 200
default_top_k: 5
embedding_batch_size: 32
crag:
chunk_size: 1000
chunk_overlap: 200
default_top_k: 5
# Embedding Configuration for iris_rag
embeddings:
backend: "sentence_transformers"
model: "sentence-transformers/all-MiniLM-L6-v2"
dimension: 384