Table of Contents generated with DocToc
历史: github.com/Shopify/sarama,Shopify 开发的一个 Kafka 库,提供了生产者、消费者、分区消费者等功能
两种生产者:AsyncProducer(异步,在大部分情况下推荐) or the SyncProducer(同步阻塞,等待ack).
-
The AsyncProducer accepts messages on a channel and produces them asynchronously in the background as efficiently as possible; it is preferred in most cases.
-
The SyncProducer provides a method which will block until Kafka acknowledges the message as produced
同步阻塞生产者: 效率较低
// github.com/!i!b!m/[email protected]/sync_producer.go
type SyncProducer interface {
SendMessage(msg *ProducerMessage) (partition int32, offset int64, err error)
// SendMessages produces a given set of messages, and returns only when all
// messages in the set have either succeeded or failed. Note that messages
// can succeed and fail individually; if some succeed and some fail,
// SendMessages will return an error.
SendMessages(msgs []*ProducerMessage) error
// 必须手动关闭,否则会泄露
Close() error
// TxnStatus return current producer transaction status.
TxnStatus() ProducerTxnStatusFlag
// IsTransactional return true when current producer is transactional.
IsTransactional() bool
// BeginTxn mark current transaction as ready.
BeginTxn() error
// CommitTxn commit current transaction.
CommitTxn() error
// AbortTxn abort current transaction.
AbortTxn() error
// AddOffsetsToTxn add associated offsets to current transaction.
AddOffsetsToTxn(offsets map[string][]*PartitionOffsetMetadata, groupId string) error
// AddMessageToTxn add message offsets to current transaction.
AddMessageToTxn(msg *ConsumerMessage, groupId string, metadata *string) error
}
分区处理
func (tp *topicProducer) partitionMessage(msg *ProducerMessage) error {
var partitions []int32
err := tp.breaker.Run(func() (err error) {
requiresConsistency := false
if ep, ok := tp.partitioner.(DynamicConsistencyPartitioner); ok {
requiresConsistency = ep.MessageRequiresConsistency(msg)
} else {
requiresConsistency = tp.partitioner.RequiresConsistency()
}
if requiresConsistency {
partitions, err = tp.parent.client.Partitions(msg.Topic)
} else {
partitions, err = tp.parent.client.WritablePartitions(msg.Topic)
}
return
})
if err != nil {
return err
}
numPartitions := int32(len(partitions))
if numPartitions == 0 {
return ErrLeaderNotAvailable
}
choice, err := tp.partitioner.Partition(msg, numPartitions)
if err != nil {
return err
} else if choice < 0 || choice >= numPartitions {
return ErrInvalidPartition
}
msg.Partition = partitions[choice]
return nil
}
压缩处理: 在Kafka 2.1.0版本之前,Kafka支持3种压缩算法:GZIP、Snappy和LZ4。 从 2.1.0开始,Kafka正式支持Zstandard算法(简写为zstd)。它是Facebook开源的一个压缩算法,能够提供超高的压缩比(compression ratio)
func compress(cc CompressionCodec, level int, data []byte) ([]byte, error) {
switch cc {
case CompressionNone:
return data, nil
case CompressionGZIP:
var (
err error
buf bytes.Buffer
writer *gzip.Writer
)
switch level {
case CompressionLevelDefault:
writer = gzipWriterPool.Get().(*gzip.Writer)
defer gzipWriterPool.Put(writer)
writer.Reset(&buf)
case 1:
writer = gzipWriterPoolForCompressionLevel1.Get().(*gzip.Writer)
defer gzipWriterPoolForCompressionLevel1.Put(writer)
writer.Reset(&buf)
//...
case 9:
writer = gzipWriterPoolForCompressionLevel9.Get().(*gzip.Writer)
defer gzipWriterPoolForCompressionLevel9.Put(writer)
writer.Reset(&buf)
default:
writer, err = gzip.NewWriterLevel(&buf, level)
if err != nil {
return nil, err
}
}
if _, err := writer.Write(data); err != nil {
return nil, err
}
if err := writer.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
case CompressionSnappy:
return snappy.Encode(data), nil
case CompressionLZ4:
writer := lz4WriterPool.Get().(*lz4.Writer)
defer lz4WriterPool.Put(writer)
var buf bytes.Buffer
writer.Reset(&buf)
if _, err := writer.Write(data); err != nil {
return nil, err
}
if err := writer.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
case CompressionZSTD:
return zstdCompress(ZstdEncoderParams{level}, nil, data)
default:
return nil, PacketEncodingError{fmt.Sprintf("unsupported compression codec (%d)", cc)}
}
}
分区消费者
type Consumer interface {
// Topics returns the set of available topics as retrieved from the cluster
// metadata. This method is the same as Client.Topics(), and is provided for
// convenience.
Topics() ([]string, error)
// Partitions returns the sorted list of all partition IDs for the given topic.
// This method is the same as Client.Partitions(), and is provided for convenience.
Partitions(topic string) ([]int32, error)
// ConsumePartition creates a PartitionConsumer on the given topic/partition with
// the given offset. It will return an error if this Consumer is already consuming
// on the given topic/partition. Offset can be a literal offset, or OffsetNewest
// or OffsetOldest
ConsumePartition(topic string, partition int32, offset int64) (PartitionConsumer, error)
// HighWaterMarks returns the current high water marks for each topic and partition.
// Consistency between partitions is not guaranteed since high water marks are updated separately.
HighWaterMarks() map[string]map[int32]int64
// Close shuts down the consumer. It must be called after all child
// PartitionConsumers have already been closed.
Close() error
// Pause suspends fetching from the requested partitions. Future calls to the broker will not return any
// records from these partitions until they have been resumed using Resume()/ResumeAll().
// Note that this method does not affect partition subscription.
// In particular, it does not cause a group rebalance when automatic assignment is used.
Pause(topicPartitions map[string][]int32)
// Resume resumes specified partitions which have been paused with Pause()/PauseAll().
// New calls to the broker will return records from these partitions if there are any to be fetched.
Resume(topicPartitions map[string][]int32)
// PauseAll suspends fetching from all partitions. Future calls to the broker will not return any
// records from these partitions until they have been resumed using Resume()/ResumeAll().
// Note that this method does not affect partition subscription.
// In particular, it does not cause a group rebalance when automatic assignment is used.
PauseAll()
// ResumeAll resumes all partitions which have been paused with Pause()/PauseAll().
// New calls to the broker will return records from these partitions if there are any to be fetched.
ResumeAll()
}
消费组
type ConsumerGroup interface {
// Consume joins a cluster of consumers for a given list of topics and
// starts a blocking ConsumerGroupSession through the ConsumerGroupHandler.
//
// The life-cycle of a session is represented by the following steps:
//
// 1. The consumers join the group (as explained in https://kafka.apache.org/documentation/#intro_consumers)
// and is assigned their "fair share" of partitions, aka 'claims'.
// 2. Before processing starts, the handler's Setup() hook is called to notify the user
// of the claims and allow any necessary preparation or alteration of state.
// 3. For each of the assigned claims the handler's ConsumeClaim() function is then called
// in a separate goroutine which requires it to be thread-safe. Any state must be carefully protected
// from concurrent reads/writes.
// 4. The session will persist until one of the ConsumeClaim() functions exits. This can be either when the
// parent context is canceled or when a server-side rebalance cycle is initiated.
// 5. Once all the ConsumeClaim() loops have exited, the handler's Cleanup() hook is called
// to allow the user to perform any final tasks before a rebalance.
// 6. Finally, marked offsets are committed one last time before claims are released.
//
// Please note, that once a rebalance is triggered, sessions must be completed within
// Config.Consumer.Group.Rebalance.Timeout. This means that ConsumeClaim() functions must exit
// as quickly as possible to allow time for Cleanup() and the final offset commit. If the timeout
// is exceeded, the consumer will be removed from the group by Kafka, which will cause offset
// commit failures.
// This method should be called inside an infinite loop, when a
// server-side rebalance happens, the consumer session will need to be
// recreated to get the new claims.
Consume(ctx context.Context, topics []string, handler ConsumerGroupHandler) error
// Errors returns a read channel of errors that occurred during the consumer life-cycle.
// By default, errors are logged and not returned over this channel.
// If you want to implement any custom error handling, set your config's
// Consumer.Return.Errors setting to true, and read from this channel.
Errors() <-chan error
// Close stops the ConsumerGroup and detaches any running sessions. It is required to call
// this function before the object passes out of scope, as it will otherwise leak memory.
Close() error
// Pause suspends fetching from the requested partitions. Future calls to the broker will not return any
// records from these partitions until they have been resumed using Resume()/ResumeAll().
// Note that this method does not affect partition subscription.
// In particular, it does not cause a group rebalance when automatic assignment is used.
Pause(partitions map[string][]int32)
// Resume resumes specified partitions which have been paused with Pause()/PauseAll().
// New calls to the broker will return records from these partitions if there are any to be fetched.
Resume(partitions map[string][]int32)
// Pause suspends fetching from all partitions. Future calls to the broker will not return any
// records from these partitions until they have been resumed using Resume()/ResumeAll().
// Note that this method does not affect partition subscription.
// In particular, it does not cause a group rebalance when automatic assignment is used.
PauseAll()
// Resume resumes all partitions which have been paused with Pause()/PauseAll().
// New calls to the broker will return records from these partitions if there are any to be fetched.
ResumeAll()
}