This commit is contained in:
2026-02-12 15:18:55 +01:00
parent ba32991201
commit 002e05eb89
15 changed files with 373 additions and 106 deletions

2
.gitignore vendored
View File

@@ -1 +1,3 @@
.env .env
bin/
examples/

View File

@@ -7,7 +7,7 @@ A robust MySQL/MariaDB binlog streaming replication service with automatic initi
### Quick Install (Go) ### Quick Install (Go)
```bash ```bash
go install git.ma-al.com/goc_marek/replica/cmd/replica@latest go install git.ma-al.com/goc_marek/replica@latest
``` ```
### Build from Source ### Build from Source
@@ -18,18 +18,15 @@ git clone https://git.ma-al.com/goc_marek/replica.git
cd replica cd replica
# Build the service # Build the service
go build -o replica ./cmd/replica go build -o bin/replica ./main.go
# Or install globally # Run the service
go install ./cmd/replica ./bin/replica
``` ```
### Docker ### Docker Compose
```bash ```bash
# Build the image
docker build -t replica .
# Run with docker-compose # Run with docker-compose
docker-compose up -d docker-compose up -d
``` ```
@@ -76,14 +73,14 @@ nano .env
| File | Purpose | | File | Purpose |
|------|---------| |------|---------|
| [`cmd/replica/main.go`](cmd/replica/main.go) | Application entry point and configuration | | [`main.go`](main.go) | Application entry point and configuration |
| [`pkg/replica/service.go`](pkg/replica/service.go) | BinlogSyncService - core replication orchestration | | [`replica/service.go`](replica/service.go) | BinlogSyncService - core replication orchestration |
| [`pkg/replica/handlers.go`](pkg/replica/handlers.go) | EventHandlers - binlog event processing with resilience | | [`replica/handlers.go`](replica/handlers.go) | EventHandlers - binlog event processing with resilience |
| [`pkg/replica/initial_transfer.go`](pkg/replica/initial_transfer.go) | InitialTransfer - bulk data transfer management | | [`replica/initial_transfer.go`](replica/initial_transfer.go) | InitialTransfer - bulk data transfer management |
| [`pkg/replica/position.go`](pkg/replica/position.go) | PositionManager - binlog position persistence | | [`replica/position.go`](replica/position.go) | PositionManager - binlog position persistence |
| [`pkg/replica/sqlbuilder.go`](pkg/replica/sqlbuilder.go) | SQLBuilder - SQL statement generation | | [`replica/sqlbuilder.go`](replica/sqlbuilder.go) | SQLBuilder - SQL statement generation |
| [`pkg/replica/config.go`](pkg/replica/config.go) | Configuration types | | [`replica/config.go`](replica/config.go) | Configuration types |
| [`pkg/replica/logging.go`](pkg/replica/logging.go) | Structured logging with Graylog support | | [`replica/logging.go`](replica/logging.go) | Structured logging with Graylog support |
### Data Flow ### Data Flow
@@ -116,10 +113,10 @@ BinlogSyncService.processEvent()
```bash ```bash
# Build the service # Build the service
go build -o replica go build -o bin/replica ./main.go
# Run the service # Run the service
./replica ./bin/replica
``` ```
### Configuration ### Configuration
@@ -213,7 +210,8 @@ GRAYLOG_SOURCE=binlog-sync-prod
| Variable | Description | Default | | Variable | Description | Default |
|----------|-------------|---------| |----------|-------------|---------|
| `TRANSFER_BATCH_SIZE` | Rows per transfer chunk | `1000` | | `TRANSFER_BATCH_SIZE` | Rows per transfer chunk | `10000` |
| `TRANSFER_WORKER_COUNT` | Number of parallel transfer workers | `4` |
| `LOCAL_PROJECT_NAME` | Project name for logging | `naluconcept` | | `LOCAL_PROJECT_NAME` | Project name for logging | `naluconcept` |
## Resilience Features ## Resilience Features
@@ -359,7 +357,7 @@ When resync is needed (empty replica or no saved position), the service performs
- Get table schema (column definitions) - Get table schema (column definitions)
- Check row count - Check row count
- Transfer in chunks using primary key or LIMIT/OFFSET - Transfer in chunks using primary key or LIMIT/OFFSET
5. **Progress Checkpointing**: Save progress to JSON file every 1000 rows 5. **Progress Checkpointing**: Save progress to JSON file every 10000 rows
6. **Position Reset**: Clear saved binlog position after successful transfer 6. **Position Reset**: Clear saved binlog position after successful transfer
7. **Binlog Streaming**: Start streaming from current position 7. **Binlog Streaming**: Start streaming from current position
@@ -375,7 +373,7 @@ SELECT * FROM table WHERE pk >= 1000 AND pk < 2000 ORDER BY pk
SELECT * FROM table LIMIT 1000 OFFSET 1000 SELECT * FROM table LIMIT 1000 OFFSET 1000
``` ```
**Batch Size:** Configurable (default: 1000 rows per chunk) **Batch Size:** Configurable (default: 10000 rows per chunk)
### Progress Checkpointing ### Progress Checkpointing
@@ -400,7 +398,7 @@ If the transfer is interrupted, it resumes from the last checkpoint.
Transfers can be paused and resumed programmatically: Transfers can be paused and resumed programmatically:
```go ```go
transfer := NewInitialTransfer(dsn, dsn, 1000, 1) transfer := NewInitialTransfer(dsn, dsn, 10000, 4)
// Pause during transfer // Pause during transfer
transfer.Pause() transfer.Pause()
@@ -445,8 +443,8 @@ type EventHandlers struct {
type InitialTransfer struct { type InitialTransfer struct {
primaryDB *sql.DB primaryDB *sql.DB
secondaryDB *sql.DB secondaryDB *sql.DB
batchSize int // Default: 1000 batchSize int // Default: 10000
workerCount int // Default: 1 workerCount int // Default: 4
excludedDBs map[string]bool excludedDBs map[string]bool
checkpointFile string checkpointFile string
progress TransferProgress progress TransferProgress
@@ -575,15 +573,15 @@ SELECT COUNT(*) FROM your_table;
## Performance Considerations ## Performance Considerations
1. **Batch Size**: Start with 1000, adjust based on table size and memory 1. **Batch Size**: Start with 10000, adjust based on table size and memory
2. **Connection Pooling**: `SetMaxOpenConns(25)` for moderate load 2. **Connection Pooling**: `SetMaxOpenConns(25)` for moderate load
3. **Worker Count**: Currently single-threaded (multi-worker planned) 3. **Worker Count**: Default: 4 workers for parallel processing
4. **Schema Caching**: Table schemas cached in memory (auto-updated on drift) 4. **Schema Caching**: Table schemas cached in memory (auto-updated on drift)
5. **Index Usage**: Chunked transfers require indexed primary key 5. **Index Usage**: Chunked transfers require indexed primary key
## Limitations ## Limitations
- **Single-threaded**: One worker processes events sequentially - **Multi-threaded**: Multiple workers process events in parallel (configurable via TRANSFER_WORKER_COUNT)
- **Position-based**: No GTID support yet (position-based only) - **Position-based**: No GTID support yet (position-based only)
- **Integer PKs**: Chunking requires integer primary key for efficiency - **Integer PKs**: Chunking requires integer primary key for efficiency
- **No Conflict Resolution**: Concurrent writes not handled - **No Conflict Resolution**: Concurrent writes not handled
@@ -618,18 +616,20 @@ SELECT COUNT(*) FROM your_table;
``` ```
replica/ replica/
├── cmd/ ├── main.go # Entry point
│ └── replica/ ── replica/
└── main.go # Entry point ├── service.go # Replication orchestration
├── pkg/ │ ├── handlers.go # Event processing
── replica/ ── initial_transfer.go # Bulk data transfer
├── service.go # Replication orchestration ├── position.go # Position persistence
├── handlers.go # Event processing ├── sqlbuilder.go # SQL generation
├── initial_transfer.go # Bulk data transfer ├── config.go # Configuration types
├── position.go # Position persistence └── logging.go # Structured logging
│ ├── sqlbuilder.go # SQL generation ├── examples/
├── config.go # Configuration types └── binlog-listener/
│ └── logging.go # Structured logging │ └── main.go # Example binlog listener
├── bin/
│ └── replica # Compiled binary
├── example.env # Environment template ├── example.env # Environment template
├── .env # Environment (gitignored) ├── .env # Environment (gitignored)
├── docker-compose.yml # Local development ├── docker-compose.yml # Local development

View File

@@ -12,6 +12,7 @@ services:
- --log_bin=log_bin - --log_bin=log_bin
- --binlog_format=ROW - --binlog_format=ROW
- --server-id=${MARIA_SERVER_ID} - --server-id=${MARIA_SERVER_ID}
- --log_bin_trust_function_creators=1
ports: ports:
- "${MARIA_PRIMARY_PORT}:3306" - "${MARIA_PRIMARY_PORT}:3306"
networks: networks:
@@ -21,8 +22,8 @@ services:
environment: environment:
MARIADB_USER: ${MARIA_USER} MARIADB_USER: ${MARIA_USER}
MARIADB_PASSWORD: ${MARIA_PASS} MARIADB_PASSWORD: ${MARIA_PASS}
MYSQL_DATABASE: ${MARIA_NAME} MARIADB_DATABASE: ${MARIA_NAME}
MYSQL_ROOT_PASSWORD: ${MARIA_PASS} MARIADB_ROOT_PASSWORD: ${MARIA_PASS}
restart: always restart: always
mariadb-secondary: mariadb-secondary:
@@ -46,27 +47,12 @@ services:
environment: environment:
MARIADB_USER: ${MARIA_USER} MARIADB_USER: ${MARIA_USER}
MARIADB_PASSWORD: ${MARIA_PASS} MARIADB_PASSWORD: ${MARIA_PASS}
MYSQL_DATABASE: ${MARIA_NAME} MARIADB_DATABASE: ${MARIA_NAME}
MYSQL_ROOT_PASSWORD: ${MARIA_PASS} MARIADB_ROOT_PASSWORD: ${MARIA_PASS}
restart: always restart: always
depends_on: depends_on:
- mariadb-primary - mariadb-primary
# postgresql:
# container_name: ${POSTGRES_HOST}
# restart: always
# image: postgres:18
# networks:
# repl:
# ports:
# - 5432:5432
# volumes:
# - postgres-data:/var/lib/postgresql:Z
# command: postgres -c shared_buffers=512MB -c work_mem=16MB -c maintenance_work_mem=256MB -c effective_cache_size=4GB -c max_connections=20
# environment:
# POSTGRES_USER: ${POSTGRES_USER}
# POSTGRES_PASSWORD: ${POSTGRES_PASS}
# POSTGRES_DB: ${POSTGRES_NAME}
networks: networks:

View File

@@ -1,21 +1,22 @@
# Primary MariaDB Configuration # Primary MariaDB Configuration
MARIA_USER=replica MARIA_USER=root
MARIA_PASS=replica MARIA_PASS=replica
MARIA_NAME=replica
MARIA_SERVER_ID=100 MARIA_SERVER_ID=100
MARIA_PRIMARY_HOST=mariadb-primary MARIA_PRIMARY_HOST=localhost
MARIA_PRIMARY_PORT=3306 MARIA_PRIMARY_PORT=3306
MARIA_PRIMARY_NAME=mariadb-primary MARIA_PRIMARY_NAME=mariadb-primary
# Secondary MariaDB Configuration (comma-separated for multiple) # Secondary MariaDB Configuration (comma-separated for multiple)
# Format: host1:port1,host2:port2,host3:port3 # Format: host1:port1,host2:port2,host3:port3
# Or just hostnames and use MARIA_SECONDARY_PORTS for ports # Or just hostnames and use MARIA_SECONDARY_PORTS for ports
MARIA_SECONDARY_HOSTS=mariadb-secondary-1,mariadb-secondary-2,mariadb-secondary-3 MARIA_SECONDARY_HOSTS=localhost
MARIA_SECONDARY_PORTS=3307,3308,3309 MARIA_SECONDARY_PORTS=3307
MARIA_SECONDARY_NAMES=secondary-1,secondary-2,secondary-3 MARIA_SECONDARY_NAMES=mariadb-secondary
# Optional: Override per-secondary credentials (must match number of secondaries or use defaults) # Optional: Override per-secondary credentials (must match number of secondaries or use defaults)
# MARIA_SECONDARY_USERS=replica1,replica2,replica3 MARIA_SECONDARY_USERS=root
# MARIA_SECONDARY_PASSWORDS=pass1,pass2,pass3 MARIA_SECONDARY_PASSWORDS=replica
# Legacy single secondary (for backward compatibility) # Legacy single secondary (for backward compatibility)
# MARIA_SECONDARY_HOST=mariadb-secondary # MARIA_SECONDARY_HOST=mariadb-secondary

1
go.mod
View File

@@ -11,6 +11,7 @@ require (
filippo.io/edwards25519 v1.1.0 // indirect filippo.io/edwards25519 v1.1.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect github.com/goccy/go-json v0.10.2 // indirect
github.com/google/uuid v1.3.0 // indirect github.com/google/uuid v1.3.0 // indirect
github.com/joho/godotenv v1.5.1 // indirect
github.com/klauspost/compress v1.17.8 // indirect github.com/klauspost/compress v1.17.8 // indirect
github.com/pingcap/errors v0.11.5-0.20250318082626-8f80e5cb09ec // indirect github.com/pingcap/errors v0.11.5-0.20250318082626-8f80e5cb09ec // indirect
github.com/pingcap/log v1.1.1-0.20241212030209-7e3ff8601a2a // indirect github.com/pingcap/log v1.1.1-0.20241212030209-7e3ff8601a2a // indirect

2
go.sum
View File

@@ -12,6 +12,8 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=

View File

@@ -7,7 +7,7 @@ import (
"strconv" "strconv"
"syscall" "syscall"
"git.ma-al.com/goc_marek/replica/pkg/replica" "git.ma-al.com/goc_marek/replica/replica"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
) )
@@ -88,7 +88,9 @@ func main() {
// Start all services // Start all services
replica.Info("Starting binlog replication...") replica.Info("Starting binlog replication...")
multiService.StartAll() if err := multiService.StartAll(cfg.BatchSize, cfg.WorkerCount, cfg.ExcludeSchemas); err != nil {
replica.Fatalf("Failed to start all services: %v", err)
}
// Wait for shutdown signal // Wait for shutdown signal
sig := <-sigChan sig := <-sigChan

BIN
replica

Binary file not shown.

View File

@@ -5,6 +5,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/joho/godotenv"
) )
// BinlogConfig holds the configuration for connecting to MySQL/MariaDB binlog // BinlogConfig holds the configuration for connecting to MySQL/MariaDB binlog
@@ -47,6 +49,7 @@ type AppConfig struct {
// Transfer settings // Transfer settings
BatchSize int BatchSize int
WorkerCount int
ExcludeSchemas []string ExcludeSchemas []string
// Graylog configuration // Graylog configuration
@@ -55,8 +58,15 @@ type AppConfig struct {
// LoadEnvConfig loads configuration from environment variables // LoadEnvConfig loads configuration from environment variables
func LoadEnvConfig() (*AppConfig, error) { func LoadEnvConfig() (*AppConfig, error) {
// Load .env file
if err := godotenv.Load(); err != nil {
// It's not an error if the .env file doesn't exist
// We just use the system environment variables
}
cfg := &AppConfig{ cfg := &AppConfig{
BatchSize: 1000, BatchSize: 10000, // Increase default batch size from 1000 to 10000 for better performance
WorkerCount: 4, // Default to 4 workers for parallel processing
ExcludeSchemas: []string{"information_schema", "performance_schema", "mysql", "sys"}, ExcludeSchemas: []string{"information_schema", "performance_schema", "mysql", "sys"},
} }
@@ -135,6 +145,11 @@ func LoadEnvConfig() (*AppConfig, error) {
cfg.BatchSize = batchSize cfg.BatchSize = batchSize
} }
// Worker count override
if workerCount := getEnvInt("TRANSFER_WORKER_COUNT", 0); workerCount > 0 {
cfg.WorkerCount = workerCount
}
// Graylog configuration // Graylog configuration
cfg.Graylog.Enabled = getEnvBool("GRAYLOG_ENABLED", false) cfg.Graylog.Enabled = getEnvBool("GRAYLOG_ENABLED", false)
cfg.Graylog.Endpoint = getEnv("GRAYLOG_ENDPOINT", "localhost:12201") cfg.Graylog.Endpoint = getEnv("GRAYLOG_ENDPOINT", "localhost:12201")

View File

@@ -3,6 +3,7 @@ package replica
import ( import (
"database/sql" "database/sql"
"fmt" "fmt"
"strings"
"sync" "sync"
"time" "time"
@@ -90,9 +91,21 @@ func (h *EventHandlers) HandleQuery(e *replication.QueryEvent) error {
query := string(e.Query) query := string(e.Query)
if h.secondaryDB != nil { if h.secondaryDB != nil {
// Skip CREATE TABLE statements to avoid ER_TABLE_EXISTS_ERROR
trimmedQuery := strings.ToUpper(strings.TrimSpace(query))
if strings.HasPrefix(trimmedQuery, "CREATE TABLE") {
Info("[%s][INFO] Skipping CREATE TABLE query: %s", h.secondaryName, query)
return nil
}
_, err := h.secondaryDB.Exec(query) _, err := h.secondaryDB.Exec(query)
if err != nil { if err != nil {
Errorf("[%s][ERROR] Query failed: %v", h.secondaryName, err) // Skip ER_TABLE_EXISTS_ERROR specifically
if strings.Contains(strings.ToUpper(err.Error()), "ER_TABLE_EXISTS_ERROR") {
Info("[%s][INFO] Table already exists, skipping query: %s", h.secondaryName, query)
} else {
Errorf("[%s][ERROR] Query failed: %v", h.secondaryName, err)
}
} }
} }
return nil return nil
@@ -292,7 +305,7 @@ func (h *EventHandlers) executeWithRetry(query string) error {
if err == nil { if err == nil {
if result != nil { if result != nil {
rowsAffected, _ := result.RowsAffected() rowsAffected, _ := result.RowsAffected()
Info("[%s][SUCCESS] %d row(s) affected", h.secondaryName, rowsAffected) Infof("[%s][SUCCESS] %d row(s) affected", h.secondaryName, rowsAffected)
} }
return nil return nil
} }

View File

@@ -42,7 +42,9 @@ type InitialTransfer struct {
progress TransferProgress progress TransferProgress
pauseChan chan struct{} pauseChan chan struct{}
resumeChan chan struct{} resumeChan chan struct{}
stopChan chan struct{} // New channel for stop signal
isPaused bool isPaused bool
isStopped bool
} }
// NewInitialTransfer creates a new initial transfer handler // NewInitialTransfer creates a new initial transfer handler
@@ -51,16 +53,18 @@ func NewInitialTransfer(primaryDSN, secondaryDSN string, batchSize, workerCount
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to connect to primary: %v", err) return nil, fmt.Errorf("failed to connect to primary: %v", err)
} }
primaryDB.SetMaxOpenConns(batchSize) // Optimize connection pool settings - use worker count for max open connections
primaryDB.SetMaxIdleConns(2) primaryDB.SetMaxOpenConns(workerCount * 2) // 2 connections per worker
primaryDB.SetMaxIdleConns(workerCount)
secondaryDB, err := sql.Open("mysql", secondaryDSN) secondaryDB, err := sql.Open("mysql", secondaryDSN)
if err != nil { if err != nil {
primaryDB.Close() primaryDB.Close()
return nil, fmt.Errorf("failed to connect to secondary: %v", err) return nil, fmt.Errorf("failed to connect to secondary: %v", err)
} }
secondaryDB.SetMaxOpenConns(batchSize) // Optimize connection pool settings - use worker count for max open connections
secondaryDB.SetMaxIdleConns(2) secondaryDB.SetMaxOpenConns(workerCount * 2) // 2 connections per worker
secondaryDB.SetMaxIdleConns(workerCount)
if err := primaryDB.Ping(); err != nil { if err := primaryDB.Ping(); err != nil {
primaryDB.Close() primaryDB.Close()
@@ -91,6 +95,7 @@ func NewInitialTransfer(primaryDSN, secondaryDSN string, batchSize, workerCount
}, },
pauseChan: make(chan struct{}), pauseChan: make(chan struct{}),
resumeChan: make(chan struct{}), resumeChan: make(chan struct{}),
stopChan: make(chan struct{}), // Initialize stop channel
}, nil }, nil
} }
@@ -122,6 +127,15 @@ func (t *InitialTransfer) Transfer(excludeSchemas []string) error {
} }
for i, dbName := range databases { for i, dbName := range databases {
// Check for stop signal
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
default:
}
// Check for pause signal // Check for pause signal
t.checkPause() t.checkPause()
@@ -140,6 +154,12 @@ func (t *InitialTransfer) Transfer(excludeSchemas []string) error {
t.saveProgress() t.saveProgress()
if err := t.transferDatabase(dbName); err != nil { if err := t.transferDatabase(dbName); err != nil {
// Check if the error is due to transfer being stopped
if t.isStopped {
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
}
return fmt.Errorf("failed to transfer database %s: %v", dbName, err) return fmt.Errorf("failed to transfer database %s: %v", dbName, err)
} }
@@ -162,12 +182,24 @@ func (t *InitialTransfer) Transfer(excludeSchemas []string) error {
return nil return nil
} }
// checkPause checks if transfer should be paused // checkPause checks if transfer should be paused or stopped
func (t *InitialTransfer) checkPause() { func (t *InitialTransfer) checkPause() {
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped")
return
default:
}
if t.isPaused { if t.isPaused {
Info("[TRANSFER] Transfer paused, waiting for resume...") Info("[TRANSFER] Transfer paused, waiting for resume...")
<-t.resumeChan select {
Info("[TRANSFER] Transfer resumed") case <-t.resumeChan:
Info("[TRANSFER] Transfer resumed")
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped while paused")
t.isPaused = false
}
} }
} }
@@ -189,6 +221,15 @@ func (t *InitialTransfer) Resume() {
} }
} }
// Stop stops the transfer
func (t *InitialTransfer) Stop() {
if !t.isStopped {
t.isStopped = true
close(t.stopChan)
Info("[TRANSFER] Transfer stop requested")
}
}
// saveProgress saves the current progress to a checkpoint file // saveProgress saves the current progress to a checkpoint file
func (t *InitialTransfer) saveProgress() { func (t *InitialTransfer) saveProgress() {
t.progress.LastCheckpoint = time.Now() t.progress.LastCheckpoint = time.Now()
@@ -268,22 +309,63 @@ func (t *InitialTransfer) transferDatabase(dbName string) error {
Infof("[TRANSFER] Found %d tables in %s", len(tables), dbName) Infof("[TRANSFER] Found %d tables in %s", len(tables), dbName)
// All tables need to be processed - we'll resume from where we left off within each table
var unprocessedTables []string
for _, table := range tables { for _, table := range tables {
// Check for pause signal unprocessedTables = append(unprocessedTables, table)
t.checkPause() }
// Skip already processed tables // Create worker pool
tableKey := dbName + "." + table jobs := make(chan string, len(unprocessedTables))
if t.progress.TablesProcessed[tableKey] > 0 { results := make(chan error, len(unprocessedTables))
Infof("[TRANSFER] Skipping already processed table: %s", tableKey)
continue
}
if err := t.transferTable(dbName, table); err != nil { for w := 0; w < t.workerCount; w++ {
Errorf("[ERROR] Failed to transfer table %s.%s: %v", dbName, table, err) go func(workerID int) {
t.mu.Lock() for table := range jobs {
t.stats.Errors = append(t.stats.Errors, fmt.Sprintf("%s.%s: %v", dbName, table, err)) // Check for stop signal
t.mu.Unlock() select {
case <-t.stopChan:
results <- nil // Signal stop
return
default:
}
// Check for pause signal
t.checkPause()
if err := t.transferTable(dbName, table); err != nil {
// Check if the error is due to transfer being stopped
if t.isStopped {
results <- nil // Signal stop
return
}
Errorf("[ERROR] Worker %d: Failed to transfer table %s.%s: %v", workerID, dbName, table, err)
t.mu.Lock()
t.stats.Errors = append(t.stats.Errors, fmt.Sprintf("%s.%s: %v", dbName, table, err))
t.mu.Unlock()
}
results <- nil
}
}(w)
}
// Send jobs to workers
for _, table := range unprocessedTables {
jobs <- table
}
close(jobs)
// Wait for all workers to complete
for range unprocessedTables {
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
case err := <-results:
if err != nil {
return err
}
} }
} }
@@ -313,12 +395,26 @@ func (t *InitialTransfer) getTables(dbName string) ([]string, error) {
// transferTable transfers a single table with chunked reads // transferTable transfers a single table with chunked reads
func (t *InitialTransfer) transferTable(dbName, tableName string) error { func (t *InitialTransfer) transferTable(dbName, tableName string) error {
// Check for stop signal
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
default:
}
// Get table structure // Get table structure
schema, err := t.getTableSchema(dbName, tableName) schema, err := t.getTableSchema(dbName, tableName)
if err != nil { if err != nil {
return fmt.Errorf("failed to get table schema: %v", err) return fmt.Errorf("failed to get table schema: %v", err)
} }
// Create table on secondary if it doesn't exist
if err := t.createTableOnSecondary(dbName, tableName); err != nil {
return fmt.Errorf("failed to create table on secondary: %v", err)
}
// Check if table has data // Check if table has data
hasData, err := t.tableHasData(dbName, tableName) hasData, err := t.tableHasData(dbName, tableName)
if err != nil { if err != nil {
@@ -345,7 +441,87 @@ func (t *InitialTransfer) transferTable(dbName, tableName string) error {
return t.transferTableFullScan(dbName, tableName, schema, count) return t.transferTableFullScan(dbName, tableName, schema, count)
} }
return t.transferTableChunked(dbName, tableName, schema, pkColumn, count) // Check if primary key is numeric before attempting chunked transfer
if isPrimaryKeyNumeric(dbName, tableName, pkColumn, t.primaryDB) {
return t.transferTableChunked(dbName, tableName, schema, pkColumn, count)
} else {
Warnf("[WARN] Non-numeric primary key found for %s.%s, using full scan", dbName, tableName)
return t.transferTableFullScan(dbName, tableName, schema, count)
}
}
// createTableOnSecondary creates the table on the secondary database if it doesn't exist
func (t *InitialTransfer) createTableOnSecondary(dbName, tableName string) error {
// Get CREATE TABLE statement from primary
var createSQL string
query := fmt.Sprintf("SHOW CREATE TABLE `%s`.`%s`", dbName, tableName)
err := t.primaryDB.QueryRow(query).Scan(&tableName, &createSQL)
if err != nil {
return fmt.Errorf("failed to get CREATE TABLE statement: %v", err)
}
// Execute CREATE TABLE on secondary (with IF NOT EXISTS and foreign key checks disabled)
// This prevents errors due to foreign key constraints when tables are created in random order
_, err = t.secondaryDB.Exec("SET FOREIGN_KEY_CHECKS = 0")
if err != nil {
return fmt.Errorf("failed to disable foreign key checks: %v", err)
}
createSQL = strings.Replace(createSQL, "CREATE TABLE", "CREATE TABLE IF NOT EXISTS", 1)
_, err = t.secondaryDB.Exec(createSQL)
if err != nil {
// Try again without foreign key constraints if we still get an error
modifiedSQL := removeForeignKeyConstraints(createSQL)
_, err = t.secondaryDB.Exec(modifiedSQL)
if err != nil {
return fmt.Errorf("failed to create table: %v", err)
}
Infof("[TRANSFER] Table %s.%s created on secondary without foreign key constraints", dbName, tableName)
} else {
Infof("[TRANSFER] Table %s.%s created on secondary", dbName, tableName)
}
_, err = t.secondaryDB.Exec("SET FOREIGN_KEY_CHECKS = 1")
if err != nil {
return fmt.Errorf("failed to enable foreign key checks: %v", err)
}
return nil
}
// removeForeignKeyConstraints removes foreign key constraints from CREATE TABLE SQL
func removeForeignKeyConstraints(sql string) string {
// This is a simple implementation - more complex cases might require proper SQL parsing
// Remove FOREIGN KEY clauses
sql = strings.ReplaceAll(sql, "FOREIGN KEY", "/* FOREIGN KEY */")
return sql
}
// isPrimaryKeyNumeric checks if the primary key column is numeric type
func isPrimaryKeyNumeric(dbName, tableName, pkColumn string, db *sql.DB) bool {
query := fmt.Sprintf(
"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS "+
"WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s' AND COLUMN_NAME = '%s'",
dbName, tableName, pkColumn,
)
var dataType string
err := db.QueryRow(query).Scan(&dataType)
if err != nil {
return false
}
// List of numeric data types (MySQL/MariaDB)
numericTypes := []string{"tinyint", "smallint", "mediumint", "int", "bigint",
"decimal", "numeric", "float", "double", "real"}
for _, t := range numericTypes {
if strings.Contains(strings.ToLower(dataType), t) {
return true
}
}
return false
} }
// tableHasData checks if a table has any rows // tableHasData checks if a table has any rows
@@ -442,9 +618,24 @@ func (t *InitialTransfer) transferTableChunked(dbName, tableName string, columns
startOffset = minMax.Min startOffset = minMax.Min
} }
// Check if table has already been fully processed
if startOffset >= minMax.Max {
Infof("[TRANSFER] Table %s.%s already fully processed, skipping", dbName, tableName)
return nil
}
// Transfer chunks // Transfer chunks
offset := startOffset offset := startOffset
for offset < minMax.Max { for offset < minMax.Max {
// Check for stop signal
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
default:
}
// Check for pause signal // Check for pause signal
t.checkPause() t.checkPause()
@@ -462,6 +653,12 @@ func (t *InitialTransfer) transferTableChunked(dbName, tableName string, columns
rows.Close() rows.Close()
if err != nil { if err != nil {
// Check if the error is due to transfer being stopped
if t.isStopped {
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
}
return fmt.Errorf("failed to insert chunk: %v", err) return fmt.Errorf("failed to insert chunk: %v", err)
} }
@@ -470,14 +667,15 @@ func (t *InitialTransfer) transferTableChunked(dbName, tableName string, columns
t.progress.TablesProcessed[tableKey] = offset + int64(t.batchSize) t.progress.TablesProcessed[tableKey] = offset + int64(t.batchSize)
t.mu.Unlock() t.mu.Unlock()
// Save checkpoint every 1000 rows // Save checkpoint every 100 rows (for testing purposes)
if rowsInserted%1000 == 0 { if rowsInserted%100 == 0 {
t.saveProgress() t.saveProgress()
} }
offset += int64(t.batchSize) offset += int64(t.batchSize)
} }
Infof("[TRANSFER] Table %s.%s fully processed", dbName, tableName)
return nil return nil
} }
@@ -510,9 +708,24 @@ func (t *InitialTransfer) transferTableFullScan(dbName, tableName string, column
// Get starting offset from progress // Get starting offset from progress
startOffset := t.progress.TablesProcessed[tableKey] startOffset := t.progress.TablesProcessed[tableKey]
// Check if table has already been fully processed
if startOffset >= rowCount {
Infof("[TRANSFER] Table %s.%s already fully processed, skipping", dbName, tableName)
return nil
}
var offset int64 = startOffset var offset int64 = startOffset
for offset < rowCount { for offset < rowCount {
// Check for stop signal
select {
case <-t.stopChan:
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
default:
}
// Check for pause signal // Check for pause signal
t.checkPause() t.checkPause()
@@ -530,6 +743,12 @@ func (t *InitialTransfer) transferTableFullScan(dbName, tableName string, column
rows.Close() rows.Close()
if err != nil { if err != nil {
// Check if the error is due to transfer being stopped
if t.isStopped {
Info("[TRANSFER] Transfer stopped, saving progress...")
t.saveProgress()
return nil
}
return fmt.Errorf("failed to insert chunk: %v", err) return fmt.Errorf("failed to insert chunk: %v", err)
} }
@@ -538,19 +757,27 @@ func (t *InitialTransfer) transferTableFullScan(dbName, tableName string, column
t.progress.TablesProcessed[tableKey] = offset t.progress.TablesProcessed[tableKey] = offset
t.mu.Unlock() t.mu.Unlock()
// Save checkpoint every 1000 rows // Save checkpoint every 100 rows (for testing purposes)
if rowsInserted%1000 == 0 { if rowsInserted%100 == 0 {
t.saveProgress() t.saveProgress()
} }
offset += int64(t.batchSize) offset += int64(t.batchSize)
} }
Infof("[TRANSFER] Table %s.%s fully processed", dbName, tableName)
return nil return nil
} }
// insertRows inserts rows from a query result into the secondary database // insertRows inserts rows from a query result into the secondary database
func (t *InitialTransfer) insertRows(db *sql.DB, dbName, tableName string, columns []ColumnInfo, rows *sql.Rows) (int64, error) { func (t *InitialTransfer) insertRows(db *sql.DB, dbName, tableName string, columns []ColumnInfo, rows *sql.Rows) (int64, error) {
// Disable foreign key checks to avoid errors due to constraint violations
_, err := db.Exec("SET FOREIGN_KEY_CHECKS = 0")
if err != nil {
return 0, fmt.Errorf("failed to disable foreign key checks: %v", err)
}
defer db.Exec("SET FOREIGN_KEY_CHECKS = 1") // Ensure foreign key checks are re-enabled
// Build INSERT statement // Build INSERT statement
placeholders := make([]string, len(columns)) placeholders := make([]string, len(columns))
colNames := make([]string, len(columns)) colNames := make([]string, len(columns))
@@ -560,7 +787,7 @@ func (t *InitialTransfer) insertRows(db *sql.DB, dbName, tableName string, colum
} }
insertSQL := fmt.Sprintf( insertSQL := fmt.Sprintf(
"INSERT INTO `%s`.`%s` (%s) VALUES (%s)", "INSERT IGNORE INTO `%s`.`%s` (%s) VALUES (%s)",
dbName, tableName, strings.Join(colNames, ", "), strings.Join(placeholders, ", "), dbName, tableName, strings.Join(colNames, ", "), strings.Join(placeholders, ", "),
) )
@@ -587,7 +814,9 @@ func (t *InitialTransfer) insertRows(db *sql.DB, dbName, tableName string, colum
_, err := stmt.Exec(values...) _, err := stmt.Exec(values...)
if err != nil { if err != nil {
return rowCount, fmt.Errorf("failed to insert row: %v", err) // Log the error but continue with other rows
Warnf("[TRANSFER] Failed to insert row into %s.%s: %v", dbName, tableName, err)
continue
} }
rowCount++ rowCount++
} }
@@ -598,7 +827,6 @@ func (t *InitialTransfer) insertRows(db *sql.DB, dbName, tableName string, colum
t.mu.Unlock() t.mu.Unlock()
Infof("[TRANSFER] Inserted %d rows into %s.%s", rowCount, dbName, tableName) Infof("[TRANSFER] Inserted %d rows into %s.%s", rowCount, dbName, tableName)
return rowCount, rows.Err() return rowCount, rows.Err()
} }

View File

@@ -275,7 +275,7 @@ func (s *BinlogSyncService) checkSecondaryHasData() (bool, error) {
} }
// RunInitialTransfer performs the initial data transfer // RunInitialTransfer performs the initial data transfer
func (s *BinlogSyncService) RunInitialTransfer(batchSize int, excludeSchemas []string) error { func (s *BinlogSyncService) RunInitialTransfer(batchSize, workerCount int, excludeSchemas []string) error {
Infof("[%s] Starting initial data transfer...", s.secondaryName) Infof("[%s] Starting initial data transfer...", s.secondaryName)
// Get secondary DB from handlers // Get secondary DB from handlers
@@ -289,7 +289,7 @@ func (s *BinlogSyncService) RunInitialTransfer(batchSize int, excludeSchemas []s
primaryDB: s.primaryDB, primaryDB: s.primaryDB,
secondaryDB: secondaryDB, secondaryDB: secondaryDB,
batchSize: batchSize, batchSize: batchSize,
workerCount: 1, workerCount: workerCount,
excludedDBs: map[string]bool{ excludedDBs: map[string]bool{
"information_schema": true, "information_schema": true,
"performance_schema": true, "performance_schema": true,
@@ -300,10 +300,27 @@ func (s *BinlogSyncService) RunInitialTransfer(batchSize int, excludeSchemas []s
progress: TransferProgress{ progress: TransferProgress{
TablesProcessed: make(map[string]int64), TablesProcessed: make(map[string]int64),
}, },
stopChan: make(chan struct{}),
} }
if err := transfer.Transfer(excludeSchemas); err != nil { // Start transfer in goroutine
return fmt.Errorf("transfer failed: %v", err) errChan := make(chan error)
go func() {
errChan <- transfer.Transfer(excludeSchemas)
}()
// Wait for transfer completion or stop signal
select {
case <-s.stopChan:
Info("[%s] Initial transfer stopping...", s.secondaryName)
transfer.Stop()
<-errChan // Wait for transfer to finish stopping
Info("[%s] Initial transfer stopped", s.secondaryName)
return nil
case err := <-errChan:
if err != nil {
return fmt.Errorf("transfer failed: %v", err)
}
} }
// Reset position after successful transfer // Reset position after successful transfer
@@ -316,7 +333,7 @@ func (s *BinlogSyncService) RunInitialTransfer(batchSize int, excludeSchemas []s
} }
// StartWithResync starts binlog streaming with automatic resync if needed // StartWithResync starts binlog streaming with automatic resync if needed
func (s *BinlogSyncService) StartWithResync(batchSize int, excludeSchemas []string) error { func (s *BinlogSyncService) StartWithResync(batchSize, workerCount int, excludeSchemas []string) error {
if err := s.positionMgr.InitTable(); err != nil { if err := s.positionMgr.InitTable(); err != nil {
Warnf("[%s][WARN] Failed to init position table: %v", s.secondaryName, err) Warnf("[%s][WARN] Failed to init position table: %v", s.secondaryName, err)
} }
@@ -328,7 +345,7 @@ func (s *BinlogSyncService) StartWithResync(batchSize int, excludeSchemas []stri
} }
if needResync { if needResync {
if err := s.RunInitialTransfer(batchSize, excludeSchemas); err != nil { if err := s.RunInitialTransfer(batchSize, workerCount, excludeSchemas); err != nil {
return fmt.Errorf("initial transfer failed: %v", err) return fmt.Errorf("initial transfer failed: %v", err)
} }
} }
@@ -363,13 +380,13 @@ func (m *MultiBinlogSyncService) AddService(service *BinlogSyncService) {
} }
// StartAll starts all services // StartAll starts all services
func (m *MultiBinlogSyncService) StartAll() error { func (m *MultiBinlogSyncService) StartAll(batchSize, workerCount int, excludeSchemas []string) error {
for _, service := range m.services { for _, service := range m.services {
m.wg.Add(1) m.wg.Add(1)
go func(svc *BinlogSyncService) { go func(svc *BinlogSyncService) {
defer m.wg.Done() defer m.wg.Done()
Infof("[%s] Starting binlog sync service", svc.GetSecondaryName()) Infof("[%s] Starting binlog sync service", svc.GetSecondaryName())
if err := svc.StartWithResync(1000, []string{"information_schema", "performance_schema", "mysql", "sys"}); err != nil { if err := svc.StartWithResync(batchSize, workerCount, excludeSchemas); err != nil {
Errorf("[%s] Service error: %v", svc.GetSecondaryName(), err) Errorf("[%s] Service error: %v", svc.GetSecondaryName(), err)
} }
}(service) }(service)