SlideShare uses cookies to improve functionality and performance, and to provide you with relevant advertising. If you continue browsing the site, you agree to the use of cookies on this website. See our User Agreement and Privacy Policy.
SlideShare uses cookies to improve functionality and performance, and to provide you with relevant advertising. If you continue browsing the site, you agree to the use of cookies on this website. See our Privacy Policy and User Agreement for details.
Successfully reported this slideshow.
Activate your 14 day free trial to unlock unlimited reading.
InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil
Fabio Akita, Co-Founder daCodeminer 42 fala sobre Performance, anti-patterns e stacks para desenvolvimento ágil no InterCon 2016.
Saiba mais http://intercon2016.imasters.com.br/
Fabio Akita, Co-Founder daCodeminer 42 fala sobre Performance, anti-patterns e stacks para desenvolvimento ágil no InterCon 2016.
Saiba mais http://intercon2016.imasters.com.br/
4.
#!/usr/bin/env ruby
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
require 'optparse'
options = { test: false }
option_parser = OptionParser.new do |opts|
opts.banner = "Usage: manga-downloadr [options]"
opts.on("-t", "--test", "Test routine") do |t|
options[:url] = "http://www.mangareader.net/onepunch-man"
options[:name] = "one-punch-man"
options[:directory] = "/tmp/manga-downloadr/one-punch-man"
options[:test] = true
end
opts.on("-u URL", "--url URL",
"Full MangaReader.net manga homepage URL - required") do |v|
options[:url] = v
end
opts.on("-n NAME", "--name NAME",
"slug to be used for the sub-folder to store all manga files - required") do |n|
options[:name] = n
end
opts.on("-d DIRECTORY", "--directory DIRECTORY",
"main folder where all mangas will be stored - required") do |d|
options[:directory] = d
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
end
6.
require 'manga-downloadr'
generator = MangaDownloadr::Workflow.create(options[:url], options[:name],
options[:directory])
puts "Massive parallel scanning of all chapters "
generator.fetch_chapter_urls!
puts "nMassive parallel scanning of all pages "
generator.fetch_page_urls!
puts "nMassive parallel scanning of all images "
generator.fetch_image_urls!
puts "nTotal page links found: #{generator.chapter_pages_count}"
puts "nMassive parallel download of all page images "
generator.fetch_images!
puts "nCompiling all images into PDF volumes "
generator.compile_ebooks!
puts "nProcess finished."
7.
require 'manga-downloadr'
generator = MangaDownloadr::Workflow.create(options[:url], options[:name],
options[:directory])
unless generator.state?(:chapter_urls)
puts "Massive parallel scanning of all chapters "
generator.fetch_chapter_urls!
end
unless generator.state?(:page_urls)
puts "nMassive parallel scanning of all pages "
generator.fetch_page_urls!
end
unless generator.state?(:image_urls)
puts "nMassive parallel scanning of all images "
generator.fetch_image_urls!
puts "nTotal page links found: #{generator.chapter_pages_count}"
end
unless generator.state?(:images)
puts "nMassive parallel download of all page images "
generator.fetch_images!
end
unless options[:test]
puts "nCompiling all images into PDF volumes "
generator.compile_ebooks!
end
puts "nProcess finished."
MangaDownloadr::Workflow
8.
MangaDownloadr::Workflowmodule MangaDownloadr
ImageData = Struct.new(:folder, :filename, :url)
class Workflow
def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {})
end
def fetch_chapter_urls!
end
def fetch_page_urls!
end
def fetch_image_urls!
end
def fetch_images!
end
def compile_ebooks!
end
def state?(state)
end
private
def current_state(state)
end
end
end
fetch_chapter_urls!
9.
module MangaDownloadr
ImageData = Struct.new(:folder, :filename, :url)
class Workflow
def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {})
end
def fetch_chapter_urls!
end
def fetch_page_urls!
end
def fetch_image_urls!
end
def fetch_images!
end
def compile_ebooks!
end
def state?(state)
end
private
def current_state(state)
end
end
end
fetch_chapter_urls!
12.
def fetch_page_urls!
chapter_list.each do |chapter_link|
response = Typhoeus.get "http://www.mangareader.net#{chapter_link}"
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
13.
def fetch_page_urls!
chapter_list.each do |chapter_link|
begin
response = Typhoeus.get "http://www.mangareader.net#{chapter_link}"
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
rescue => e
puts e
end
end
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
14.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
15.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
16.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
17.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
18.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
19.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
20.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
21.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
print '.'
rescue => e
self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty?
puts "n Errors fetching page urls:"
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
current_state :page_urls
end
22.
def fetch_page_urls!
hydra = Typhoeus::Hydra.new(max_con
chapter_list.each do |chapter_link|
begin
request = Typhoeus::Request.new
request.on_complete do |respons
begin
chapter_doc = Nokogiri::HTM
pages = chapter_doc.xpath("
chapter_pages.merge!(chapte
print '.'
rescue => e
self.fetch_page_urls_errors
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
unless fetch_page_urls_errors.empty
puts "n Errors fetching page url
puts fetch_page_urls_errors
end
self.chapter_pages_count = chapter_
current_state :page_urls
end
23.
def fetch_image_urls!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each do |chapter_key|
chapter_pages[chapter_key].each do |page_link|
begin
request = Typhoeus::Request.new "http://www.mangareader.net#{page_link}"
request.on_complete do |response|
begin
chapter_doc = Nokogiri::HTML(response.body)
image = chapter_doc.css('#img').first
tokens = image['alt'].match("^(.*?)s-s(.*?)$")
extension = File.extname(URI.parse(image['src']).path)
chapter_images.merge!(chapter_key => []) if chapter_images[chapter_key].nil?
chapter_images[chapter_key] << ImageData.new( tokens[1], "#{tokens[2]}#{extension}", image['src'] )
print '.'
rescue => e
self.fetch_image_urls_errors << { url: page_link, error: e }
print 'x'
end
end
hydra.queue request
rescue => e
puts e
end
end
end
hydra.run
unless fetch_image_urls_errors.empty?
puts "nErrors fetching image urls:"
puts fetch_image_urls_errors
end
current_state :image_urls
end
24.
def fetch_images!
hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
chapter_list.each_with_index do |chapter_key, chapter_index|
chapter_images[chapter_key].each do |file|
downloaded_filename = File.join(manga_root_folder, file.folder, file.filename)
next if File.exists?(downloaded_filename) # effectively resumes the download list without re-downloading eve
request = Typhoeus::Request.new file.url
request.on_complete do |response|
begin
# download
FileUtils.mkdir_p(File.join(manga_root_folder, file.folder))
File.open(downloaded_filename, "wb+") { |f| f.write response.body }
unless is_test
# resize
image = Magick::Image.read( downloaded_filename ).first
resized = image.resize_to_fit(600, 800)
resized.write( downloaded_filename ) { self.quality = 50 }
GC.start # to avoid a leak too big (ImageMagick is notorious for that, specially on resizes)
end
print '.'
rescue => e
self.fetch_images_errors << { url: file.url, error: e }
print '#'
end
end
hydra.queue request
end
end
hydra.run
unless fetch_images_errors.empty?
puts "nErrors downloading images:"
puts fetch_images_errors
end
current_state :images
end
25.
def compile_ebooks!
folders = Dir[manga_root_folder + "/*/"].sort_by { |element| ary = element.split(" ").last.to_i }
self.download_links = folders.inject([]) do |list, folder|
list += Dir[folder + "*.*"].sort_by { |element| ary = element.split(" ").last.to_i }
end
# concatenating PDF files (250 pages per volume)
chapter_number = 0
while !download_links.empty?
chapter_number += 1
pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf")
list = download_links.slice!(0..pages_per_volume)
Prawn::Document.generate(pdf_file, page_size: page_size) do |pdf|
list.each do |image_file|
begin
pdf.image image_file, position: :center, vposition: :center
rescue => e
puts "Error in #{image_file} - #{e}"
end
end
end
print '.'
end
current_state :ebooks
end
26.
manga-downloadr -t
199.69s user 10.30s system 124% cpu 2:48.14 total
27.
manga-downloadr -t
199.69s user 10.30s system 124% cpu 2:48.14 total
41.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
42.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
43.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
44.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
45.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
46.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
47.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
.
!"" _build
# $"" ...
!"" config
# $"" config.exs
!"" deps
# !"" ...
!"" ex_manga_downloadr
!"" lib
# !"" ex_manga_downloadr
# # !"" cli.ex
# # !"" mangafox
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" mangareader
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" pool_management
# # # !"" supervisor.ex
# # # $"" worker.ex
# # $"" workflow.ex
# !"" ex_manga_downloadr.ex
# $"" pool_management.ex
!"" mix.exs
!"" mix.lock
!"" README.md
$"" test
!"" ex_manga_downloadr
# !"" mangafox_test.exs
# $"" mangareader_test.exs
!"" ex_manga_downloadr_test.exs
$"" test_helper.exs
61 directories, 281 files
48.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
.
!"" _build
# $"" ...
!"" config
# $"" config.exs
!"" deps
# !"" ...
!"" ex_manga_downloadr
!"" lib
# !"" ex_manga_downloadr
# # !"" cli.ex
# # !"" mangafox
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" mangareader
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" pool_management
# # # !"" supervisor.ex
# # # $"" worker.ex
# # $"" workflow.ex
# !"" ex_manga_downloadr.ex
# $"" pool_management.ex
!"" mix.exs
!"" mix.lock
!"" README.md
$"" test
!"" ex_manga_downloadr
# !"" mangafox_test.exs
# $"" mangareader_test.exs
!"" ex_manga_downloadr_test.exs
$"" test_helper.exs
61 directories, 281 files
49.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
.
!"" _build
# $"" ...
!"" config
# $"" config.exs
!"" deps
# !"" ...
!"" ex_manga_downloadr
!"" lib
# !"" ex_manga_downloadr
# # !"" cli.ex
# # !"" mangafox
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" mangareader
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" pool_management
# # # !"" supervisor.ex
# # # $"" worker.ex
# # $"" workflow.ex
# !"" ex_manga_downloadr.ex
# $"" pool_management.ex
!"" mix.exs
!"" mix.lock
!"" README.md
$"" test
!"" ex_manga_downloadr
# !"" mangafox_test.exs
# $"" mangareader_test.exs
!"" ex_manga_downloadr_test.exs
$"" test_helper.exs
61 directories, 281 files
50.
worker.exdefmodule PoolManagement.Worker do
use GenServer
# Public APIs
def index_page(url, source) do
end
def chapter_page([chapter_link, source]) do
end
def page_image([page_link, source]) do
end
def page_download_image(image_data, directory) do
end
# internal GenServer implementation
def handle_call({:chapter_page, chapter_link, source}, _from, state) do
end
def handle_call({:page_image, page_link, source}, _from, state) do
end
def handle_call({:page_download_image, image_data, directory}, _from, state)
end
## Helper functions
defp manga_source(source, module) do
case source do
"mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}"
"mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}"
end
end
defp download_image({image_src, image_filename}, directory) do
end
end
.
!"" _build
# $"" ...
!"" config
# $"" config.exs
!"" deps
# !"" ...
!"" ex_manga_downloadr
!"" lib
# !"" ex_manga_downloadr
# # !"" cli.ex
# # !"" mangafox
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" mangareader
# # # !"" chapter_page.ex
# # # !"" index_page.ex
# # # $"" page.ex
# # !"" pool_management
# # # !"" supervisor.ex
# # # $"" worker.ex
# # $"" workflow.ex
# !"" ex_manga_downloadr.ex
# $"" pool_management.ex
!"" mix.exs
!"" mix.lock
!"" README.md
$"" test
!"" ex_manga_downloadr
# !"" mangafox_test.exs
# $"" mangareader_test.exs
!"" ex_manga_downloadr_test.exs
$"" test_helper.exs
61 directories, 281 files
59.
cli.exdefmodule ExMangaDownloadr.CLI do
alias ExMangaDownloadr.Workflow
require ExMangaDownloadr
def main(args) do
args
|> parse_args
|> process
end
...
defp parse_args(args) do
end
defp process(:help) do
end
defp process(directory, url) do
File.mkdir_p!(directory)
File.mkdir_p!("/tmp/ex_manga_downloadr_cache")
manga_name = directory |> String.split("/") |> Enum.reverse |> Enum.at(0)
url
|> Workflow.determine_source
|> Workflow.chapters
|> Workflow.pages
|> Workflow.images_sources
|> Workflow.process_downloads(directory)
|> Workflow.optimize_images
|> Workflow.compile_pdfs(manga_name)
|> finish_process
end
defp process_test(directory, url) do
end
defp finish_process(directory) do
end
end
Workflow
61.
workflow.exdefmodule ExMangaDownloadr.Workflow do
alias PoolManagement.Worker
require Logger
def chapters({url, source}) do
end
def pages({chapter_list, source}) do
pages_list = chapter_list
|> Enum.map(&Worker.chapter_page([&1, source]))
|> Enum.map(&Task.await(&1, @await_timeout_ms))
|> Enum.reduce([], fn {:ok, list}, acc -> acc ++ list end)
{pages_list, source}
end
def images_sources({pages_list, source}) do
end
def process_downloads(images_list, directory) do
end
def optimize_images(directory) do
Porcelain.shell("mogrify -resize #{@image_dimensions} #{directory}/*.jpg")
directory
end
def compile_pdfs(directory, manga_name) do
end
end
62.
workflow.exdefmodule ExMangaDownloadr.Workflow do
alias PoolManagement.Worker
require Logger
def chapters({url, source}) do
end
def pages({chapter_list, source}) do
pages_list = chapter_list
|> Enum.map(&Worker.chapter_page([&1, source]))
|> Enum.map(&Task.await(&1, @await_timeout_ms))
|> Enum.reduce([], fn {:ok, list}, acc -> acc ++ list end)
{pages_list, source}
end
def images_sources({pages_list, source}) do
end
def process_downloads(images_list, directory) do
end
def optimize_images(directory) do
Porcelain.shell("mogrify -resize #{@image_dimensions} #{directory}/*.jpg")
directory
end
def compile_pdfs(directory, manga_name) do
end
end
63.
ex_manga_downloadr —test
28.36s user 15.57s system 33% cpu 2:10.28 total
64.
ex_manga_downloadr —test
28.36s user 15.57s system 33% cpu 2:10.28 total
69.
defmodule ExMangaDownloadr.MangaReader.IndexPage do
require Logger
require ExMangaDownloadr
def chapters(manga_root_url) do
ExMangaDownloadr.fetch manga_root_url, do: collect
end
defp collect(html) do
{fetch_manga_title(html), fetch_chapters(html)}
end
defp fetch_manga_title(html) do
html
|> Floki.find("#mangaproperties h1")
|> Floki.text
end
defp fetch_chapters(html) do
html
|> Floki.find("#listing a")
|> Floki.attribute("href")
end
end
70.
defmodule ExMangaDownloadr.MangaReader.IndexPage do
require Logger
require ExMangaDownloadr
def chapters(manga_root_url) do
ExMangaDownloadr.fetch manga_root_url, do: collect
end
defp collect(html) do
{fetch_manga_title(html), fetch_chapters(html)}
end
defp fetch_manga_title(html) do
html
|> Floki.find("#mangaproperties h1")
|> Floki.text
end
defp fetch_chapters(html) do
html
|> Floki.find("#listing a")
|> Floki.attribute("href")
end
end
71.
defmodule ExMangaDownloadr.MangaReader.IndexPage do
require Logger
require ExMangaDownloadr
def chapters(manga_root_url) do
ExMangaDownloadr.fetch manga_root_url, do: collect
end
defp collect(html) do
{fetch_manga_title(html), fetch_chapters(html)}
end
defp fetch_manga_title(html) do
html
|> Floki.find("#mangaproperties h1")
|> Floki.text
end
defp fetch_chapters(html) do
html
|> Floki.find("#listing a")
|> Floki.attribute("href")
end
end
72.
require "./downloadr_client"
require "xml"
module CrMangaDownloadr
class Chapters < DownloadrClient
def initialize(@domain, @root_uri : String, @cache_http = false)
super(@domain, @cache_http)
end
def fetch
html = get(@root_uri)
nodes = html.xpath_nodes(
"//table[contains(@id, 'listing')]//td//a/@href")
nodes.map { |node| node.text.as(String) }
end
end
end
DownloadrClient
73.
require "./downloadr_client"
require "xml"
module CrMangaDownloadr
class Chapters < DownloadrClient
def initialize(@domain, @root_uri : String, @cache_http = false)
super(@domain, @cache_http)
end
def fetch
html = get(@root_uri)
nodes = html.xpath_nodes(
"//table[contains(@id, 'listing')]//td//a/@href")
nodes.map { |node| node.text.as(String) }
end
end
end
DownloadrClient
74.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
75.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
76.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
77.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
78.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
79.
DownloadrClient
module CrMangaDownloadr
class DownloadrClient
@http_client : HTTP::Client
def initialize(@domain : String, @cache_http = false)
end
def get(uri : String)
cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}"
while true
begin
response = if @cache_http && File.exists?(cache_path)
body = File.read(cache_path)
HTTP::Client::Response.new(200, body)
else
@http_client.get(uri, headers: HTTP::Headers{
"User-Agent" => CrMangaDownloadr::USER_AGENT })
end
case response.status_code
when 301
uri = response.headers["Location"]
when 200
if @cache_http && !File.exists?(cache_path)
File.open(cache_path, "w") do |f|
f.print response.body
end
end
return XML.parse_html(response.body)
end
rescue IO::Timeout
puts "Sleeping over #{uri}"
sleep 1
end
end
end
end
end
80.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
81.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
82.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
83.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
84.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
85.
require "fiberpool"
module CrMangaDownloadr
struct Concurrency
def initialize(@config : Config, @turn_on_engine = true); end
def fetch(collection : Array(A)?, engine_class : E.class,
&block : A, E? -> Array(B)?) : Array(B)
results = [] of B
if collection
pool = Fiberpool.new(collection,
@config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain,
@config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
end
results
end
end
end
fetch
Concurrency
86.
fetch
Concurrency
module CrMangaDownloadr
class Workflow
end
module Steps
def self.fetch_chapters(config : Config)
end
def self.fetch_pages(chapters : Array(String)?, config : Config)
puts "Fetching pages from all chapters ..."
reactor = Concurrency.new(config)
reactor.fetch(chapters, Pages) do |link, engine|
engine.try(&.fetch(link))
end
end
def self.fetch_images(pages : Array(String)?, config : Config)
end
def self.download_images(images : Array(Image)?, config : Config)
end
def self.optimize_images(downloads : Array(String), config : Config)
end
def self.prepare_volumes(downloads : Array(String), config : Config)
end
end
end
87.
fetch
Concurrency
module CrMangaDownloadr
class Workflow
end
module Steps
def self.fetch_chapters(config : Config)
end
def self.fetch_pages(chapters : Array(String)?, config : Config)
puts "Fetching pages from all chapters ..."
reactor = Concurrency.new(config)
reactor.fetch(chapters, Pages) do |link, engine|
engine.try(&.fetch(link))
end
end
def self.fetch_images(pages : Array(String)?, config : Config)
end
def self.download_images(images : Array(Image)?, config : Config)
end
def self.optimize_images(downloads : Array(String), config : Config)
end
def self.prepare_volumes(downloads : Array(String), config : Config)
end
end
end
88.
cr_manga_downloadr -t
0.28s user 0.53s system 0% cpu 1:52.45 total
89.
cr_manga_downloadr -t
0.28s user 0.53s system 0% cpu 1:52.45 total
96.
# concurrency.cr
pool = Fiberpool.new(collection, @config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain, @config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
97.
# concurrency.cr
pool = Fiberpool.new(collection, @config.download_batch_size)
pool.run do |item|
engine = if @turn_on_engine
engine_class.new(@config.domain, @config.cache_http)
end
if reply = block.call(item, engine)
results.concat(reply)
end
end
pool = Thread.pool(@config.download_batch_size)
mutex = Mutex.new
results = []
collection.each do |item|
pool.process {
engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil
reply = block.call(item, engine)&.flatten
mutex.synchronize do
results += ( reply || [] )
end
}
end
pool.shutdown
98.
module CrMangaDownloadr
class Pages < DownloadrClient
def fetch(chapter_link : String)
html = get(chapter_link)
nodes = html.xpath_nodes("//div[@id='selectpage']//select[@id='pageMenu']//option")
nodes.map { |node| "#{chapter_link}/#{node.text}" }
end
end
end
99.
module CrMangaDownloadr
class Pages < DownloadrClient
def fetch(chapter_link : String)
html = get(chapter_link)
nodes = html.xpath_nodes("//div[@id='selectpage']//select[@id='pageMenu']//option")
nodes.map { |node| "#{chapter_link}/#{node.text}" }
end
end
end
module MangaDownloadr
class Pages < DownloadrClient
def fetch(chapter_link)
get chapter_link do |html|
nodes = html.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option")
nodes.map { |node| [chapter_link, node.children.to_s].join("/") }
end
end
end
end
100.
manga-downloadr -t
16.55s user 6.65s system 17% cpu 2:13.86 total
101.
manga-downloadr -t
16.55s user 6.65s system 17% cpu 2:13.86 total