InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil

1. PREMATURE OPTIMIZATION The Root of ALL Evil @akitaonrails (CODE VERSION)

2. @akitaonrails

11. manga-downloadr -u http://www.mangareader.net/onepunch-man -d /tmp/manga/one-punch-man -n one-punch-man

12. #!/usr/bin/env ruby $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib') require 'optparse' options = { test: false } option_parser = OptionParser.new do |opts| opts.banner = "Usage: manga-downloadr [options]" opts.on("-t", "--test", "Test routine") do |t| options[:url] = "http://www.mangareader.net/onepunch-man" options[:name] = "one-punch-man" options[:directory] = "/tmp/manga-downloadr/one-punch-man" options[:test] = true end opts.on("-u URL", "--url URL", "Full MangaReader.net manga homepage URL - required") do |v| options[:url] = v end opts.on("-n NAME", "--name NAME", "slug to be used for the sub-folder to store all manga files - required") do |n| options[:name] = n end opts.on("-d DIRECTORY", "--directory DIRECTORY", "main folder where all mangas will be stored - required") do |d| options[:directory] = d end opts.on("-h", "--help", "Show this message") do puts opts exit end end

13. require 'manga-downloadr' generator = MangaDownloadr::Workflow.create(options[:url], options[:name], options[:directory]) generator.fetch_chapter_urls! generator.fetch_page_urls! generator.fetch_image_urls! generator.fetch_images! generator.compile_ebooks!

14. require 'manga-downloadr' generator = MangaDownloadr::Workflow.create(options[:url], options[:name], options[:directory]) puts "Massive parallel scanning of all chapters " generator.fetch_chapter_urls! puts "nMassive parallel scanning of all pages " generator.fetch_page_urls! puts "nMassive parallel scanning of all images " generator.fetch_image_urls! puts "nTotal page links found: #{generator.chapter_pages_count}" puts "nMassive parallel download of all page images " generator.fetch_images! puts "nCompiling all images into PDF volumes " generator.compile_ebooks! puts "nProcess finished."

15. require 'manga-downloadr' generator = MangaDownloadr::Workflow.create(options[:url], options[:name], options[:directory]) unless generator.state?(:chapter_urls) puts "Massive parallel scanning of all chapters " generator.fetch_chapter_urls! end unless generator.state?(:page_urls) puts "nMassive parallel scanning of all pages " generator.fetch_page_urls! end unless generator.state?(:image_urls) puts "nMassive parallel scanning of all images " generator.fetch_image_urls! puts "nTotal page links found: #{generator.chapter_pages_count}" end unless generator.state?(:images) puts "nMassive parallel download of all page images " generator.fetch_images! end unless options[:test] puts "nCompiling all images into PDF volumes " generator.compile_ebooks! end puts "nProcess finished." MangaDownloadr::Workflow

16. MangaDownloadr::Workflowmodule MangaDownloadr ImageData = Struct.new(:folder, :filename, :url) class Workflow def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {}) end def fetch_chapter_urls! end def fetch_page_urls! end def fetch_image_urls! end def fetch_images! end def compile_ebooks! end def state?(state) end private def current_state(state) end end end fetch_chapter_urls!

17. module MangaDownloadr ImageData = Struct.new(:folder, :filename, :url) class Workflow def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {}) end def fetch_chapter_urls! end def fetch_page_urls! end def fetch_image_urls! end def fetch_images! end def compile_ebooks! end def state?(state) end private def current_state(state) end end end fetch_chapter_urls!

18. fetch_chapter_urls!def fetch_chapter_urls! doc = Nokogiri::HTML(open(manga_root_url)) self.chapter_list = doc.css("#listing a").map { |l| l['href']} self.manga_title = doc.css("#mangaproperties h1").first.text current_state :chapter_urls end

19. fetch_chapter_urls!def fetch_chapter_urls! doc = Nokogiri::HTML(open(manga_root_url)) self.chapter_list = doc.css("#listing a").map { |l| l['href']} self.manga_title = doc.css("#mangaproperties h1").first.text current_state :chapter_urls end

20. def fetch_page_urls! chapter_list.each do |chapter_link| response = Typhoeus.get "http://www.mangareader.net#{chapter_link}" chapter_doc = Nokogiri::HTML(response.body) pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option") chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] }) print '.' end self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size } current_state :page_urls end

21. def fetch_page_urls! chapter_list.each do |chapter_link| begin response = Typhoeus.get "http://www.mangareader.net#{chapter_link}" begin chapter_doc = Nokogiri::HTML(response.body) pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option") chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] }) print '.' rescue => e self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body } print 'x' end end rescue => e puts e end end unless fetch_page_urls_errors.empty? puts "n Errors fetching page urls:" puts fetch_page_urls_errors end self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size } current_state :page_urls end

22. def fetch_page_urls! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each do |chapter_link| begin request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}" request.on_complete do |response| begin chapter_doc = Nokogiri::HTML(response.body) pages = chapter_doc.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option") chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] }) print '.' rescue => e self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body } print 'x' end end hydra.queue request rescue => e puts e end end hydra.run unless fetch_page_urls_errors.empty? puts "n Errors fetching page urls:" puts fetch_page_urls_errors end self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size } current_state :page_urls end

30. def fetch_page_urls! hydra = Typhoeus::Hydra.new(max_con chapter_list.each do |chapter_link| begin request = Typhoeus::Request.new request.on_complete do |respons begin chapter_doc = Nokogiri::HTM pages = chapter_doc.xpath(" chapter_pages.merge!(chapte print '.' rescue => e self.fetch_page_urls_errors print 'x' end end hydra.queue request rescue => e puts e end end hydra.run unless fetch_page_urls_errors.empty puts "n Errors fetching page url puts fetch_page_urls_errors end self.chapter_pages_count = chapter_ current_state :page_urls end

31. def fetch_image_urls! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each do |chapter_key| chapter_pages[chapter_key].each do |page_link| begin request = Typhoeus::Request.new "http://www.mangareader.net#{page_link}" request.on_complete do |response| begin chapter_doc = Nokogiri::HTML(response.body) image = chapter_doc.css('#img').first tokens = image['alt'].match("^(.*?)s-s(.*?)$") extension = File.extname(URI.parse(image['src']).path) chapter_images.merge!(chapter_key => []) if chapter_images[chapter_key].nil? chapter_images[chapter_key] << ImageData.new( tokens[1], "#{tokens[2]}#{extension}", image['src'] ) print '.' rescue => e self.fetch_image_urls_errors << { url: page_link, error: e } print 'x' end end hydra.queue request rescue => e puts e end end end hydra.run unless fetch_image_urls_errors.empty? puts "nErrors fetching image urls:" puts fetch_image_urls_errors end current_state :image_urls end

32. def fetch_images! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each_with_index do |chapter_key, chapter_index| chapter_images[chapter_key].each do |file| downloaded_filename = File.join(manga_root_folder, file.folder, file.filename) next if File.exists?(downloaded_filename) # effectively resumes the download list without re-downloading eve request = Typhoeus::Request.new file.url request.on_complete do |response| begin # download FileUtils.mkdir_p(File.join(manga_root_folder, file.folder)) File.open(downloaded_filename, "wb+") { |f| f.write response.body } unless is_test # resize image = Magick::Image.read( downloaded_filename ).first resized = image.resize_to_fit(600, 800) resized.write( downloaded_filename ) { self.quality = 50 } GC.start # to avoid a leak too big (ImageMagick is notorious for that, specially on resizes) end print '.' rescue => e self.fetch_images_errors << { url: file.url, error: e } print '#' end end hydra.queue request end end hydra.run unless fetch_images_errors.empty? puts "nErrors downloading images:" puts fetch_images_errors end current_state :images end

33. def compile_ebooks! folders = Dir[manga_root_folder + "/*/"].sort_by { |element| ary = element.split(" ").last.to_i } self.download_links = folders.inject([]) do |list, folder| list += Dir[folder + "*.*"].sort_by { |element| ary = element.split(" ").last.to_i } end # concatenating PDF files (250 pages per volume) chapter_number = 0 while !download_links.empty? chapter_number += 1 pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf") list = download_links.slice!(0..pages_per_volume) Prawn::Document.generate(pdf_file, page_size: page_size) do |pdf| list.each do |image_file| begin pdf.image image_file, position: :center, vposition: :center rescue => e puts "Error in #{image_file} - #{e}" end end end print '.' end current_state :ebooks end

34. manga-downloadr -t 199.69s user 10.30s system 124% cpu 2:48.14 total

37. . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files mix.exs

38. mix.exsdefmodule ExMangaDownloadr.Mixfile do use Mix.Project def project do [app: :ex_manga_downloadr, version: "1.0.1", elixir: "~> 1.1", build_embedded: Mix.env == :prod, start_permanent: Mix.env == :prod, escript: [main_module: ExMangaDownloadr.CLI], deps: deps] end # Configuration for the OTP application # # Type "mix help compile.app" for more information def application do [applications: [:logger, :httpotion, :porcelain], mod: {PoolManagement, []}] end defp deps do [ {:ibrowse, "~> 4.2.2"}, {:httpotion, "~> 3.0.0"}, {:floki, "~> 0.9.0"}, {:porcelain, "~> 2.0.1"}, {:poolboy, "~> 1.5.1"}, {:mock, "~> 0.1.3", only: :test} ] end end PoolManagement

39. PoolManagement . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files pool_management.ex

40. . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files pool_management.ex

41. pool_management.exdefmodule PoolManagement do use Application def start(_type, _args) do PoolManagement.Supervisor.start_link end end Supervisor

42. Supervisor . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files supervisor.ex

43. . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files supervisor.ex

44. supervisor.exdefmodule PoolManagement.Supervisor do use Supervisor def start_link do Supervisor.start_link(__MODULE__, []) end def init([]) do pool_size = System.get_env("POOL_SIZE") || "50" pool_options = [ name: {:local, :worker_pool}, worker_module: PoolManagement.Worker, size: String.to_integer(pool_size), max_overflow: 0 ] children = [ supervisor(Task.Supervisor, [[name: Fetcher.TaskSupervisor, strategy: :transient, max_restarts: 10]]), :poolboy.child_spec(:worker_pool, pool_options, []) ] supervise(children, strategy: :one_for_one) end end Worker

48. Worker Supervisor . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files worker.ex

49. Supervisor . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files worker.ex

50. worker.exdefmodule PoolManagement.Worker do use GenServer # Public APIs def index_page(url, source) do end def chapter_page([chapter_link, source]) do end def page_image([page_link, source]) do end def page_download_image(image_data, directory) do end # internal GenServer implementation def handle_call({:chapter_page, chapter_link, source}, _from, state) do end def handle_call({:page_image, page_link, source}, _from, state) do end def handle_call({:page_download_image, image_data, directory}, _from, state) end ## Helper functions defp manga_source(source, module) do case source do "mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}" "mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}" end end defp download_image({image_src, image_filename}, directory) do end end

56. worker.exdefmodule PoolManagement.Worker do use GenServer # Public APIs def index_page(url, source) do end def chapter_page([chapter_link, source]) do end def page_image([page_link, source]) do end def page_download_image(image_data, directory) do end # internal GenServer implementation def handle_call({:chapter_page, chapter_link, source}, _from, state) do end def handle_call({:page_image, page_link, source}, _from, state) do end def handle_call({:page_download_image, image_data, directory}, _from, state) end ## Helper functions defp manga_source(source, module) do case source do "mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}" "mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}" end end defp download_image({image_src, image_filename}, directory) do end end . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files

61. POOL

62. defmodule PoolManagement.Worker do use GenServer ... def chapter_page([chapter_link, source]) do Task.Supervisor.async(Fetcher.TaskSupervisor, fn -> :poolboy.transaction :worker_pool, fn(server) -> GenServer.call(server, {:chapter_page, chapter_link, source}, @genserver_call_timeout) end, @task_async_timeout end) end ... def handle_call({:chapter_page, chapter_link, source}, _from, state) do links = source |> manga_source("ChapterPage") |> apply(:pages, [chapter_link]) {:reply, links, state} end ... defp manga_source(source, module) do case source do "mangareader" -> :"Elixir.ExMangaDownloadr.MangaReader.#{module}" "mangafox" -> :"Elixir.ExMangaDownloadr.Mangafox.#{module}" end end end ChapterPage

67. defmodule ExMangaDownloadr.Mangafox.ChapterPage do require Logger require ExMangaDownloadr def pages(chapter_link) do ExMangaDownloadr.fetch chapter_link, do: fetch_pages(chapter_link) end defp fetch_pages(html, chapter_link) do [_page|link_template] = chapter_link |> String.split("/") |> Enum.reverse html |> Floki.find("div[id='top_center_bar'] option") |> Floki.attribute("value") |> Enum.reject(fn page_number -> page_number == "0" end) |> Enum.map(fn page_number -> ["#{page_number}.html"|link_template] |> Enum.reverse |> Enum.join("/") end) end end ChapterPage

68. ChapterPage . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files cli.ex

69. cli.exdefmodule ExMangaDownloadr.CLI do alias ExMangaDownloadr.Workflow require ExMangaDownloadr def main(args) do args |> parse_args |> process end ... defp parse_args(args) do end defp process(:help) do end defp process(directory, url) do File.mkdir_p!(directory) File.mkdir_p!("/tmp/ex_manga_downloadr_cache") manga_name = directory |> String.split("/") |> Enum.reverse |> Enum.at(0) url |> Workflow.determine_source |> Workflow.chapters |> Workflow.pages |> Workflow.images_sources |> Workflow.process_downloads(directory) |> Workflow.optimize_images |> Workflow.compile_pdfs(manga_name) |> finish_process end defp process_test(directory, url) do end defp finish_process(directory) do end end Workflow

70. Workflow . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files workflow.ex

71. workflow.exdefmodule ExMangaDownloadr.Workflow do alias PoolManagement.Worker require Logger def chapters({url, source}) do end def pages({chapter_list, source}) do pages_list = chapter_list |> Enum.map(&Worker.chapter_page([&1, source])) |> Enum.map(&Task.await(&1, @await_timeout_ms)) |> Enum.reduce([], fn {:ok, list}, acc -> acc ++ list end) {pages_list, source} end def images_sources({pages_list, source}) do end def process_downloads(images_list, directory) do end def optimize_images(directory) do Porcelain.shell("mogrify -resize #{@image_dimensions} #{directory}/*.jpg") directory end def compile_pdfs(directory, manga_name) do end end

72. workflow.exdefmodule ExMangaDownloadr.Workflow do alias PoolManagement.Worker require Logger def chapters({url, source}) do end def pages({chapter_list, source}) do pages_list = chapter_list |> Enum.map(&Worker.chapter_page([&1, source])) |> Enum.map(&Task.await(&1, @await_timeout_ms)) |> Enum.reduce([], fn {:ok, list}, acc -> acc ++ list end) {pages_list, source} end def images_sources({pages_list, source}) do end def process_downloads(images_list, directory) do end def optimize_images(directory) do Porcelain.shell("mogrify -resize #{@image_dimensions} #{directory}/*.jpg") directory end def compile_pdfs(directory, manga_name) do end end

73. ex_manga_downloadr —test 28.36s user 15.57s system 33% cpu 2:10.28 total

74. ex_manga_downloadr —test 28.36s user 15.57s system 33% cpu 2:10.28 total

76. . !"" _build # $"" ... !"" config # $"" config.exs !"" deps # !"" ... !"" ex_manga_downloadr !"" lib # !"" ex_manga_downloadr # # !"" cli.ex # # !"" mangafox # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" mangareader # # # !"" chapter_page.ex # # # !"" index_page.ex # # # $"" page.ex # # !"" pool_management # # # !"" supervisor.ex # # # $"" worker.ex # # $"" workflow.ex # !"" ex_manga_downloadr.ex # $"" pool_management.ex !"" mix.exs !"" mix.lock !"" README.md $"" test !"" ex_manga_downloadr # !"" mangafox_test.exs # $"" mangareader_test.exs !"" ex_manga_downloadr_test.exs $"" test_helper.exs 61 directories, 281 files . !"" cr_manga_downloadr !"" libs # !"" ... !"" LICENSE !"" README.md !"" shard.lock !"" shard.yml !"" spec # !"" cr_manga_downloadr # # !"" chapters_spec.cr # # !"" concurrency_spec.cr # # !"" image_downloader_spec.cr # # !"" page_image_spec.cr # # $"" pages_spec.cr # !"" fixtures # # !"" ... # $"" spec_helper.cr $"" src !"" cr_manga_downloadr # !"" chapters.cr # !"" concurrency.cr # !"" downloadr_client.cr # !"" image_downloader.cr # !"" page_image.cr # !"" pages.cr # !"" records.cr # !"" version.cr # $"" workflow.cr $"" cr_manga_downloadr.cr

79. def run Dir.mkdir_p @config.download_directory pipe Steps.fetch_chapters(@config) .>> Steps.fetch_pages(@config) .>> Steps.fetch_images(@config) .>> Steps.download_images(@config) .>> Steps.optimize_images(@config) .>> Steps.prepare_volumes(@config) .>> unwrap puts "Done!" end File.mkdir_p!(directory) File.mkdir_p!("/tmp/ex_manga_downloadr_cache") manga_name = directory |> String.split("/") |> Enum.reverse |> Enum.at(0) url |> Workflow.determine_source |> Workflow.chapters |> Workflow.pages |> Workflow.images_sources |> Workflow.process_downloads(directory) |> Workflow.optimize_images |> Workflow.compile_pdfs(manga_name) |> finish_process end

81. defmodule ExMangaDownloadr.MangaReader.IndexPage do require Logger require ExMangaDownloadr def chapters(manga_root_url) do ExMangaDownloadr.fetch manga_root_url, do: collect end defp collect(html) do {fetch_manga_title(html), fetch_chapters(html)} end defp fetch_manga_title(html) do html |> Floki.find("#mangaproperties h1") |> Floki.text end defp fetch_chapters(html) do html |> Floki.find("#listing a") |> Floki.attribute("href") end end

84. require "./downloadr_client" require "xml" module CrMangaDownloadr class Chapters < DownloadrClient def initialize(@domain, @root_uri : String, @cache_http = false) super(@domain, @cache_http) end def fetch html = get(@root_uri) nodes = html.xpath_nodes( "//table[contains(@id, 'listing')]//td//a/@href") nodes.map { |node| node.text.as(String) } end end end DownloadrClient

85. require "./downloadr_client" require "xml" module CrMangaDownloadr class Chapters < DownloadrClient def initialize(@domain, @root_uri : String, @cache_http = false) super(@domain, @cache_http) end def fetch html = get(@root_uri) nodes = html.xpath_nodes( "//table[contains(@id, 'listing')]//td//a/@href") nodes.map { |node| node.text.as(String) } end end end DownloadrClient

86. DownloadrClient module CrMangaDownloadr class DownloadrClient @http_client : HTTP::Client def initialize(@domain : String, @cache_http = false) end def get(uri : String) cache_path = "/tmp/cr_manga_downloadr_cache/#{cache_filename(uri)}" while true begin response = if @cache_http && File.exists?(cache_path) body = File.read(cache_path) HTTP::Client::Response.new(200, body) else @http_client.get(uri, headers: HTTP::Headers{ "User-Agent" => CrMangaDownloadr::USER_AGENT }) end case response.status_code when 301 uri = response.headers["Location"] when 200 if @cache_http && !File.exists?(cache_path) File.open(cache_path, "w") do |f| f.print response.body end end return XML.parse_html(response.body) end rescue IO::Timeout puts "Sleeping over #{uri}" sleep 1 end end end end end

92. require "fiberpool" module CrMangaDownloadr struct Concurrency def initialize(@config : Config, @turn_on_engine = true); end def fetch(collection : Array(A)?, engine_class : E.class, &block : A, E? -> Array(B)?) : Array(B) results = [] of B if collection pool = Fiberpool.new(collection, @config.download_batch_size) pool.run do |item| engine = if @turn_on_engine engine_class.new(@config.domain, @config.cache_http) end if reply = block.call(item, engine) results.concat(reply) end end end results end end end fetch Concurrency

98. fetch Concurrency module CrMangaDownloadr class Workflow end module Steps def self.fetch_chapters(config : Config) end def self.fetch_pages(chapters : Array(String)?, config : Config) puts "Fetching pages from all chapters ..." reactor = Concurrency.new(config) reactor.fetch(chapters, Pages) do |link, engine| engine.try(&.fetch(link)) end end def self.fetch_images(pages : Array(String)?, config : Config) end def self.download_images(images : Array(Image)?, config : Config) end def self.optimize_images(downloads : Array(String), config : Config) end def self.prepare_volumes(downloads : Array(String), config : Config) end end end

99. fetch Concurrency module CrMangaDownloadr class Workflow end module Steps def self.fetch_chapters(config : Config) end def self.fetch_pages(chapters : Array(String)?, config : Config) puts "Fetching pages from all chapters ..." reactor = Concurrency.new(config) reactor.fetch(chapters, Pages) do |link, engine| engine.try(&.fetch(link)) end end def self.fetch_images(pages : Array(String)?, config : Config) end def self.download_images(images : Array(Image)?, config : Config) end def self.optimize_images(downloads : Array(String), config : Config) end def self.prepare_volumes(downloads : Array(String), config : Config) end end end

101. cr_manga_downloadr -t 0.28s user 0.53s system 0% cpu 1:52.45 total

102. cr_manga_downloadr -t 0.28s user 0.53s system 0% cpu 1:52.45 total

105. . !"" cr_manga_downloadr !"" libs # !"" ... !"" LICENSE !"" README.md !"" shard.lock !"" shard.yml !"" spec # !"" cr_manga_downloadr # # !"" chapters_spec.cr # # !"" concurrency_spec.cr # # !"" image_downloader_spec.cr # # !"" page_image_spec.cr # # $"" pages_spec.cr # !"" fixtures # # !"" ... # $"" spec_helper.cr $"" src !"" cr_manga_downloadr # !"" chapters.cr # !"" concurrency.cr # !"" downloadr_client.cr # !"" image_downloader.cr # !"" page_image.cr # !"" pages.cr # !"" records.cr # !"" version.cr # $"" workflow.cr $"" cr_manga_downloadr.cr . !"" bin # $"" manga-downloadr !"" Gemfile !"" Gemfile.lock !"" lib # !"" manga-downloadr # # !"" chapters.rb # # !"" concurrency.rb # # !"" downloadr_client.rb # # !"" image_downloader.rb # # !"" page_image.rb # # !"" pages.rb # # !"" records.rb # # !"" version.rb # # $"" workflow.rb # $"" manga-downloadr.rb !"" LICENSE.txt !"" manga-downloadr.gemspec !"" Rakefile !"" README.md $"" spec !"" fixtures # !"" ... !"" manga-downloadr # !"" chapters_spec.rb # !"" concurrency_spec.rb # !"" image_downloader_spec.rb # !"" page_image_spec.rb # $"" pages_spec.rb $"" spec_helper.rb

108. def run Dir.mkdir_p @config.download_directory pipe Steps.fetch_chapters(@config) .>> Steps.fetch_pages(@config) .>> Steps.fetch_images(@config) .>> Steps.download_images(@config) .>> Steps.optimize_images(@config) .>> Steps.prepare_volumes(@config) .>> unwrap puts "Done!" end

109. def self.run(config = Config.new) FileUtils.mkdir_p config.download_directory CM(config, Workflow) .fetch_chapters .fetch_pages(config) .fetch_images(config) .download_images(config) .optimize_images(config) .prepare_volumes(config) .unwrap puts "Done!" end def run Dir.mkdir_p @config.download_directory pipe Steps.fetch_chapters(@config) .>> Steps.fetch_pages(@config) .>> Steps.fetch_images(@config) .>> Steps.download_images(@config) .>> Steps.optimize_images(@config) .>> Steps.prepare_volumes(@config) .>> unwrap puts "Done!" end

111. # concurrency.cr pool = Fiberpool.new(collection, @config.download_batch_size) pool.run do |item| engine = if @turn_on_engine engine_class.new(@config.domain, @config.cache_http) end if reply = block.call(item, engine) results.concat(reply) end end

112. # concurrency.cr pool = Fiberpool.new(collection, @config.download_batch_size) pool.run do |item| engine = if @turn_on_engine engine_class.new(@config.domain, @config.cache_http) end if reply = block.call(item, engine) results.concat(reply) end end pool = Thread.pool(@config.download_batch_size) mutex = Mutex.new results = [] collection.each do |item| pool.process { engine = @turn_on_engine ? @engine_klass.new(@config.domain, @config.cache_http) : nil reply = block.call(item, engine)&.flatten mutex.synchronize do results += ( reply || [] ) end } end pool.shutdown

113. module CrMangaDownloadr class Pages < DownloadrClient def fetch(chapter_link : String) html = get(chapter_link) nodes = html.xpath_nodes("//div[@id='selectpage']//select[@id='pageMenu']//option") nodes.map { |node| "#{chapter_link}/#{node.text}" } end end end

114. module CrMangaDownloadr class Pages < DownloadrClient def fetch(chapter_link : String) html = get(chapter_link) nodes = html.xpath_nodes("//div[@id='selectpage']//select[@id='pageMenu']//option") nodes.map { |node| "#{chapter_link}/#{node.text}" } end end end module MangaDownloadr class Pages < DownloadrClient def fetch(chapter_link) get chapter_link do |html| nodes = html.xpath("//div[@id='selectpage']//select[@id='pageMenu']//option") nodes.map { |node| [chapter_link, node.children.to_s].join("/") } end end end end

118. Ruby/Typhoeus 124% CPU 2:38 min

119. Ruby/Typhoeus 124% CPU 2:38 min Elixir 33% CPU 2:10 min

120. Ruby/Typhoeus 124% CPU 2:38 min Elixir 33% CPU 2:10 min Crystal 0% CPU 1:52 min

121. Ruby/Typhoeus 124% CPU 2:38 min Elixir 33% CPU 2:10 min Crystal 0% CPU 1:52 min Ruby 17% CPU 2:13 min

123. Ruby Typhoeus libcurl

124. Ruby Typhoeus libcurl Elixir OTP Poolboy

125. Ruby Typhoeus libcurl Elixir OTP Poolboy Crystal Fibers Fiberpool

126. Ruby Typhoeus libcurl Elixir OTP Poolboy Crystal Fibers Fiberpool Ruby Thread Thread/Pool

129. manga-downloadr ex_manga_downloadr cr_manga_downloadr

130. manga-downloadr ex_manga_downloadr cr_manga_downloadr ﬁberpool cr_chainable_methods chainable_methods

135. PREMATURE OPTIMIZATION The Root of ALL Evil

136. THANKS @akitaonrails slideshare.net/akitaonrails

InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil

Recommended

Recommended

More Related Content

What's hot

What's hot (12)

Viewers also liked

Viewers also liked (11)

Similar to InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil

Similar to InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil (20)

More from iMasters

More from iMasters (20)

Recently uploaded

Recently uploaded (20)

InterCon 2016 - Performance, anti-patterns e stacks para desenvolvimento ágil