This document discusses scaling a feed processing microservice that retrieves an HTTP stream, decompresses it with GZip, parses the XML, extracts valid products, and uploads them to S3. It introduces using Enumerators to build a processing pipeline where each step returns an Enumerator. Classes are defined for each step, like GZipDecoder and Product. The pipeline is constructed by chaining the classes together using Enumerator#pipe.
7. chunked_io = ChunkedIO.new
xml_reader = Nokogiri::XML::Reader.new(chunked_io) do |node|
end
decoder = EventMachine::HttpDecoder::GZip.new do |decoded_chunk|
chunked_io << decoded_chunk
end
request = TyphoeusRequest.new(url)
request.run do |chunk|
decoder << chunk
end
decoder.finalize
8. chunked_io = ChunkedIO.new
xml_reader = Nokogiri::XML::Reader.new(chunked_io) do |node|
product = Product.parse(node)
s3_stream << product if product.valid?
end
decoder = EventMachine::HttpDecoder::GZip.new do |decoded_chunk|
chunked_io << decoded_chunk
end
request = TyphoeusRequest.new(url)
request.run do |chunk|
decoder << chunk
end
decoder.finalize
11. Enumerator
Enumerator.new do |yielder|
decoder = EventMachine::HttpDecoder::GZip.new do |decoded_chunk|
yielder << decoded_chunk
end
some_other_enumerator.each { |chunk| decoder << chunk }
decoder.finalize
end
12. class Pipeline < SimpleDelegator
def pipe(callable)
Pipeline.new(callable.call(self))
end
def self.build(enumerator)
new Pipeline(enumerator)
end
end
13. class GZipDecoder
def self.call(enumerable)
Enumerator.new do |yielder|
decoder = EventMachine::HttpDecoder::GZip.new do |decoded_chunk|
yielder << decoded_chunk
end
enumerable.each { |chunk| decoder << chunk }
decoder.finalize
end
end
end