带缓冲区和超时的 Go writer

问题描述 投票:0回答:2

我想使用 Go 从 SQS 向 AWS 发送请求。有在单一事件模式下执行此操作的示例,使用

sqs.SendMessageInput
SDK 中的
v2
,但我想改为批量发送。我创建了一个接口来从我的其余代码中抽象出实现细节,如下所示:

type UserRepository interface {
   Save(context.Context, User) error
}

如您所见,该接口不是特定于 SQS 的,可以很容易地替换为例如一个 Postgres 实现。我真的很想保持界面尽可能干净。

使用 SQS 有一些我能想到的注意事项,可能还有一些我没有想到的。 需要发送一批:

  1. 在每
    n
    Save
  2. 之后
  3. 在可配置的超时后
  4. 上下文取消后
  5. 在调用站点完成保存所有用户对象之后

请注意,不是发送到 SQS,这也应该以类似的方式写入控制台,这是我在示例中创建的。

这种设计可行吗?还是我的界面中总是必须有一个

Close
功能?

代码的问题是最后4个(nr_of_items % batch_size)不会被“保存”。

package main

import (
    "context"
    "fmt"
    "os"
    "os/signal"
    "sync"
    "time"
)

type LoggingBufferedUserRepository struct {
    buffer        []string
    bufferSize    int
    bufferTimeout time.Duration
    mutex         sync.Mutex
    closeChan     chan struct{}
}

func NewLoggingBufferedUserRepository(
    ctx context.Context, bufferSize int, bufferTimeout time.Duration,
) *LoggingBufferedUserRepository {
    client := &LoggingBufferedUserRepository{
        bufferSize:    bufferSize,
        bufferTimeout: bufferTimeout,
        closeChan:     make(chan struct{}),
    }

    go client.bufferMonitor(ctx)
    return client
}

func (c *LoggingBufferedUserRepository) SendMessage(ctx context.Context, input string) {
    c.mutex.Lock()
    defer c.mutex.Unlock()
    c.buffer = append(c.buffer, input)
    if len(c.buffer) >= c.bufferSize {
        go c.flush(ctx, c.buffer)
        c.buffer = []string{}
    }
    return
}

func (c *LoggingBufferedUserRepository) flush(ctx context.Context, buffer []string) {
    if len(buffer) == 0 {
        return
    }

    // This is the actual batch 'save':
    fmt.Printf("flushing buffer, size=%d, cid=%s, buffer=$%v\n", len(buffer), ctx.Value("cid"), buffer)
}

func (c *LoggingBufferedUserRepository) bufferMonitor(ctx context.Context) {
    timeout := time.NewTimer(c.bufferTimeout)
    for {
        select {
        case <-timeout.C:
            c.flush(ctx, c.buffer)
            c.buffer = []string{}
        }

        timeout.Reset(c.bufferTimeout)
    }
}

func main() {
    ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill)
    defer stop()
    g := NewLoggingBufferedUserRepository(ctx, 10, 1*time.Second)

    wg := &sync.WaitGroup{}
    for i := 0; i < 15; i++ {
        wg.Add(1)
        go func(i int) {
            defer wg.Done()
            fmt.Printf("sending message %d\n", i)
            g.SendMessage(ctx, fmt.Sprintf("a%d", i))
        }(i)
        time.Sleep(100 * time.Millisecond)
    }
    wg.Wait()
    fmt.Println("done")
}
go amazon-sqs
2个回答
0
投票

有多种方法可以做到这一点。一般来说,基于超时刷新此类缓冲实现是不可取的,因为您无法控制如果事情失败会发生什么。

一种方法是进行显式批处理操作:

type UserRepository interface {
   // Save single user
   Save(context.Context, User) error
   // Save batch
   SaveBatch(context.Context) UserBatch
}

在哪里

type UserBatch interface {
   Save(context.Context,User) error
   Flush(context.Context) error
   Close(context.Context) error
}

0
投票

UserRepository
接口是否支持批量操作 需要关闭功能?

是的,一种解决方案是在您的界面中使用不同的

Flush
函数,调用者可以使用该函数显式刷新任何剩余的缓冲项。你的界面:

type UserRepository interface {
    Save(context.Context, User) error
    Flush(context.Context) error
    SaveAll(context.Context) error
}

根据您的喜好实施此

UserRepository
的一个示例,可能类似于playground

// for SO testing, by JS

package main

import (
    "context"
    "fmt"
    "sync"
    "time"
)

type User struct {
    ID   int
    Name string
}
type UserRepository interface {
    Save(context.Context, User) error
    Flush(context.Context) error
    SaveAll(context.Context) error
}

type LoggingBufferedUserRepository struct {
    buffer        []string
    bufferSize    int
    bufferTimeout time.Duration
    mutex         sync.Mutex
    // we don't need close here now
}

// add the item to the buffer and flushes the buffer if it reaches the configured size
func (c *LoggingBufferedUserRepository) Save(ctx context.Context, user User) error {
    // implement save functionality
    fmt.Printf("Save called %v: %v\n", user.Name, user.ID)
    return nil
}

// can - Flush function immediately flushes any buffered items
func (c *LoggingBufferedUserRepository) Flush(ctx context.Context) error {
    // implement Flush functionality
    fmt.Printf("Flush called\n")
    return nil
}

// waits for any pending items to be saved before returning, and flushes any remaining buffered items if there are any
func (c *LoggingBufferedUserRepository) SaveAll(ctx context.Context) error {
    // implement SaveAll functionality
    fmt.Printf("SaveAll called\n")
    return nil
}

func (c *LoggingBufferedUserRepository) flush(ctx context.Context) error {
    if len(c.buffer) == 0 {
        return nil
    }

    // This is your actual batch 'save':
    fmt.Printf("flushing buffer, size=%d, cid=%s, buffer=$%v\n", len(c.buffer), ctx.Value("cid"), c.buffer)
    c.buffer = []string{}

    return nil
}

func main() {
    repo := &LoggingBufferedUserRepository{
        bufferSize:    5,
        bufferTimeout: 1 * time.Second,
    }

    users := []User{
        {ID: 1, Name: "Alice"},
        {ID: 2, Name: "Bob"},
        {ID: 3, Name: "Charlie"},
        {ID: 4, Name: "Dave"},
        {ID: 5, Name: "Eve"},
    }

    for _, user := range users {
        if err := repo.Save(context.Background(), user); err != nil {
            fmt.Printf("error saving user %v: %v\n", user, err)
        }
    }

    fmt.Printf("Waiting for flushing... (5 seconds)\n")
    time.Sleep(5 * time.Second)

    // Save remaining items
    if err := repo.SaveAll(context.Background()); err != nil {
        fmt.Printf("error saving remaining items: %v\n", err)
    }

    // Flush remaining items
    if err := repo.Flush(context.Background()); err != nil {
        fmt.Printf("error flushing remaining items: %v\n", err)
    }
}

这个输出应该是:

Save called Alice: 1
Save called Bob: 2
Save called Charlie: 3
Save called Dave: 4
Save called Eve: 5
Waiting for flushing... (5 seconds)
SaveAll called
Flush called

Program exited.
© www.soinside.com 2019 - 2024. All rights reserved.