SlideShare a Scribd company logo
1 of 16
Metabuscadores

                     Fabricio Echeverría
                 pechever@espol.edu.ec



Joseph Brodsky
Agenda
   •   Índices de palabras
   •   Web Search Engine
   •   Retrieval Information Systems
   •   Metabuscadores
   •   Preguntas
En busca de la memoria
  dinámica extendida
Índice de Palabras: Onomástica de los
         nombres en Catalán
Web Search Engine
        • Lenguaje de programación:
          Python
        • Manejo de Alta RAM
        • Almacenamiento
          Compartido
        • Procesamiento en Paralelo
Web Search Engine




http://nlp.stanford.edu/IR-book/pdf/19web.pdf Pag.434
Código Python – Web Search Engine
                                                     def union(a, b):                              cache = {
def crawl_web(seed): # returns index, graph of         for e in b:                                   'http://www.udacity.com/cs101x/final/multi.html': """<html>
inlinks                                                                                            <body>
                                                         if e not in a:
   tocrawl = [seed]
                                                            a.append(e)
   crawled = []                                                                                    <a href="http://www.udacity.com/cs101x/final/a.html">A</a><br>
   graph = {} # <url>, [list of pages it links to]                                                 <a href="http://www.udacity.com/cs101x/final/b.html">B</a><br>
                                                     def add_page_to_index(index, url, content):
   index = {}
                                                       words = content.split()                     </body>
   while tocrawl:
                                                       pos=0                                       """,
     page = tocrawl.pop()
                                                       for word in words:                            'http://www.udacity.com/cs101x/final/b.html': """<html>
     if page not in crawled:                                                                       <body>
                                                         pos=content.find(word, pos)
        content = get_page(page)
                                                         add_to_index(index, word, url,pos)
        add_page_to_index(index, page, content)                                                    Monty likes the Python programming language
        outlinks = get_all_links(content)                                                          Thomas Jefferson founded the University of Virginia
                                                     def add_to_index(index, keyword, url,pos):    When Mandela was in London, he visited Nelson's Column.
        graph[page] = outlinks
                                                       if keyword in index:
        union(tocrawl, outlinks)
                                                          index[keyword].append([url,pos])         </body>
        crawled.append(page)
                                                       else:                                       </html>
   return index, graph                                                                             """,
                                                          index[keyword] = [[url,pos]]
                                                                                                     'http://www.udacity.com/cs101x/final/a.html': """<html>
def get_next_target(page):                                                                         <body>
                                                     def lookup(index, keyword):
  start_link = page.find('<a href=')
                                                       if keyword in index:                        Monty Python is not about a programming language
  if start_link == -1:
                                                          return index[keyword]                    Udacity was not founded by Thomas Jefferson
     return None, 0
                                                       else:                                       Nelson Mandela said "Education is the most powerful weapon
  start_quote = page.find('"', start_link)
                                                          return None                              which you can
  end_quote = page.find('"', start_quote + 1)                                                      use to change the world."
  url = page[start_quote + 1:end_quote]                                                            </body>
  return url, end_quote                                                                            </html>
                                                                                                   """,
def get_all_links(page):                                                                           }
  links = []
                                                                                                   def get_page(url):
  while True:
                                                                                                     if url in cache:
     url, endpos = get_next_target(page)                                                                return cache[url]
     if url:                                                                                         else:
        links.append(url)                                                                               print "Page not in cache: " + url
        page = page[endpos:]                                                                            return None
     else:
        break
  return links



http://www.udacity.com/cs101
Information Retrieval Systems
Metabuscadores
• Es la unión de búsquedas(query) en varios
  buscadores(Search Engine) – Índices de
  Búsquedas -
http://dg3rtljvitrle.cloudfront.net/slides/chap10.pdf
http://dg3rtljvitrle.cloudfront.net/slides/chap10.pdf
Retrieval Systems and Metasearch Engines
Retrieval Systems and Metasearch Engines
Retrieval Systems and Metasearch Engines
Retrieval Systems and Metasearch Engines
Retrieval Systems and Metasearch Engines

More Related Content

What's hot

Huong dan cai dat hadoop
Huong dan cai dat hadoopHuong dan cai dat hadoop
Huong dan cai dat hadoopQuỳnh Phan
 
MongoDB: How it Works
MongoDB: How it WorksMongoDB: How it Works
MongoDB: How it WorksMike Dirolf
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...Jitendra Kumar Gupta
 
CouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conferenceCouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conferenceleinweber
 
Not Really PHP by the book
Not Really PHP by the bookNot Really PHP by the book
Not Really PHP by the bookRyan Kilfedder
 
REST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practiceREST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practicehamnis
 
Dropping ACID with MongoDB
Dropping ACID with MongoDBDropping ACID with MongoDB
Dropping ACID with MongoDBkchodorow
 
Wordpress Manual Document
Wordpress Manual DocumentWordpress Manual Document
Wordpress Manual DocumentFarzad Wadia
 
01 ElasticSearch : Getting Started
01 ElasticSearch : Getting Started01 ElasticSearch : Getting Started
01 ElasticSearch : Getting StartedOpenThink Labs
 
Hadoop installation
Hadoop installationHadoop installation
Hadoop installationhabeebulla g
 
Up.Php
Up.PhpUp.Php
Up.Phpwsoom
 
Puppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future PastPuppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future PastPuppet
 
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good partsConrad Irwin
 
Build PHP Search Engine
Build PHP Search EngineBuild PHP Search Engine
Build PHP Search EngineKiril Iliev
 
一次Http请求过程分析
一次Http请求过程分析一次Http请求过程分析
一次Http请求过程分析Tony Deng
 

What's hot (20)

Huong dan cai dat hadoop
Huong dan cai dat hadoopHuong dan cai dat hadoop
Huong dan cai dat hadoop
 
Cookies
CookiesCookies
Cookies
 
MongoDB: How it Works
MongoDB: How it WorksMongoDB: How it Works
MongoDB: How it Works
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
 
CouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conferenceCouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conference
 
Not Really PHP by the book
Not Really PHP by the bookNot Really PHP by the book
Not Really PHP by the book
 
REST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practiceREST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practice
 
Dropping ACID with MongoDB
Dropping ACID with MongoDBDropping ACID with MongoDB
Dropping ACID with MongoDB
 
Wordpress Manual Document
Wordpress Manual DocumentWordpress Manual Document
Wordpress Manual Document
 
01 ElasticSearch : Getting Started
01 ElasticSearch : Getting Started01 ElasticSearch : Getting Started
01 ElasticSearch : Getting Started
 
JSOP in 60 seconds
JSOP in 60 secondsJSOP in 60 seconds
JSOP in 60 seconds
 
Hadoop installation
Hadoop installationHadoop installation
Hadoop installation
 
Up.Php
Up.PhpUp.Php
Up.Php
 
Puppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future PastPuppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future Past
 
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good parts
 
Couchdb w Ruby'm
Couchdb w Ruby'mCouchdb w Ruby'm
Couchdb w Ruby'm
 
Build PHP Search Engine
Build PHP Search EngineBuild PHP Search Engine
Build PHP Search Engine
 
一次Http请求过程分析
一次Http请求过程分析一次Http请求过程分析
一次Http请求过程分析
 
Python and MongoDB
Python and MongoDBPython and MongoDB
Python and MongoDB
 
08 php-files
08 php-files08 php-files
08 php-files
 

Viewers also liked

Drupal 7 module development
Drupal 7 module developmentDrupal 7 module development
Drupal 7 module developmentAdam Kalsey
 
Iso Principles
Iso PrinciplesIso Principles
Iso PrinciplesYangWatson
 
Irmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hallIrmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hallMagali ..
 
Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...ElisabethRydberg
 
East Bay Ruby Tropo presentation
East Bay Ruby Tropo presentationEast Bay Ruby Tropo presentation
East Bay Ruby Tropo presentationAdam Kalsey
 
Introduction to jRuby
Introduction to jRubyIntroduction to jRuby
Introduction to jRubyAdam Kalsey
 
Barack Obama Family Tree
Barack Obama Family TreeBarack Obama Family Tree
Barack Obama Family TreeRaquib Khan
 

Viewers also liked (7)

Drupal 7 module development
Drupal 7 module developmentDrupal 7 module development
Drupal 7 module development
 
Iso Principles
Iso PrinciplesIso Principles
Iso Principles
 
Irmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hallIrmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hall
 
Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...
 
East Bay Ruby Tropo presentation
East Bay Ruby Tropo presentationEast Bay Ruby Tropo presentation
East Bay Ruby Tropo presentation
 
Introduction to jRuby
Introduction to jRubyIntroduction to jRuby
Introduction to jRuby
 
Barack Obama Family Tree
Barack Obama Family TreeBarack Obama Family Tree
Barack Obama Family Tree
 

Similar to Retrieval Systems and Metasearch Engines

Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)Leonardo Soto
 
Working Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in OpalWorking Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in OpalForrest Chang
 
Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011Atlassian
 
12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocratJonathan Linowes
 
Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)Leonardo Soto
 
Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4DEVCON
 
TurboGears2 Pluggable Applications
TurboGears2 Pluggable ApplicationsTurboGears2 Pluggable Applications
TurboGears2 Pluggable ApplicationsAlessandro Molina
 
Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4DEVCON
 
Building Apps with MongoDB
Building Apps with MongoDBBuilding Apps with MongoDB
Building Apps with MongoDBNate Abele
 
Creating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data MiningCreating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data MiningJonathan LeBlanc
 
Web Scraping is BS
Web Scraping is BSWeb Scraping is BS
Web Scraping is BSJohn D
 
SF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDBSF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDBPeter Hamilton
 
[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018Adam Tomat
 
Search as main navigation
Search as main navigationSearch as main navigation
Search as main navigationpunkt.de GmbH
 
Dev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDBDev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDBMongoDB
 
20190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React201920190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React2019Makoto Mori
 
Practical HTML5: Using It Today
Practical HTML5: Using It TodayPractical HTML5: Using It Today
Practical HTML5: Using It TodayDoris Chen
 
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"GeeksLab Odessa
 

Similar to Retrieval Systems and Metasearch Engines (20)

Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)
 
Working Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in OpalWorking Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in Opal
 
Finding Clojure
Finding ClojureFinding Clojure
Finding Clojure
 
Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011
 
12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat
 
Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)
 
Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4
 
TurboGears2 Pluggable Applications
TurboGears2 Pluggable ApplicationsTurboGears2 Pluggable Applications
TurboGears2 Pluggable Applications
 
SEO for Developers
SEO for DevelopersSEO for Developers
SEO for Developers
 
Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4
 
Building Apps with MongoDB
Building Apps with MongoDBBuilding Apps with MongoDB
Building Apps with MongoDB
 
Creating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data MiningCreating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data Mining
 
Web Scraping is BS
Web Scraping is BSWeb Scraping is BS
Web Scraping is BS
 
SF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDBSF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDB
 
[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018
 
Search as main navigation
Search as main navigationSearch as main navigation
Search as main navigation
 
Dev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDBDev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDB
 
20190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React201920190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React2019
 
Practical HTML5: Using It Today
Practical HTML5: Using It TodayPractical HTML5: Using It Today
Practical HTML5: Using It Today
 
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
 

More from pechever

La administración de variedad
La administración de variedadLa administración de variedad
La administración de variedadpechever
 
Red de repositorios del ecuador informe clara 3
Red de repositorios del ecuador   informe clara 3Red de repositorios del ecuador   informe clara 3
Red de repositorios del ecuador informe clara 3pechever
 
Webometrics
WebometricsWebometrics
Webometricspechever
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000pechever
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000pechever
 
Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4pechever
 
Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3pechever
 
Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2pechever
 
Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1pechever
 
Informe De Red Repositorios Diciembre 2009
Informe De Red Repositorios    Diciembre 2009Informe De Red Repositorios    Diciembre 2009
Informe De Red Repositorios Diciembre 2009pechever
 
Presentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOLPresentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOLpechever
 
PresentacióN Babaco4all
PresentacióN Babaco4allPresentacióN Babaco4all
PresentacióN Babaco4allpechever
 
Entregable Total
Entregable TotalEntregable Total
Entregable Totalpechever
 
Strategic Sourcing At P&G Julio 2009
Strategic Sourcing At P&G   Julio 2009Strategic Sourcing At P&G   Julio 2009
Strategic Sourcing At P&G Julio 2009pechever
 
Informe De Ecuabank Enero 2009
Informe De Ecuabank   Enero 2009Informe De Ecuabank   Enero 2009
Informe De Ecuabank Enero 2009pechever
 
Mba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGicaMba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGicapechever
 
H A R M O N Y S E A R C H
H A R M O N Y  S E A R C HH A R M O N Y  S E A R C H
H A R M O N Y S E A R C Hpechever
 
Presentacion Algortimos Geneticos
Presentacion Algortimos GeneticosPresentacion Algortimos Geneticos
Presentacion Algortimos Geneticospechever
 
On Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y ViceversaOn Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y Viceversapechever
 
Canadiense ven a Ecuador
Canadiense ven a EcuadorCanadiense ven a Ecuador
Canadiense ven a Ecuadorpechever
 

More from pechever (20)

La administración de variedad
La administración de variedadLa administración de variedad
La administración de variedad
 
Red de repositorios del ecuador informe clara 3
Red de repositorios del ecuador   informe clara 3Red de repositorios del ecuador   informe clara 3
Red de repositorios del ecuador informe clara 3
 
Webometrics
WebometricsWebometrics
Webometrics
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
 
Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4
 
Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3
 
Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2
 
Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1
 
Informe De Red Repositorios Diciembre 2009
Informe De Red Repositorios    Diciembre 2009Informe De Red Repositorios    Diciembre 2009
Informe De Red Repositorios Diciembre 2009
 
Presentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOLPresentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOL
 
PresentacióN Babaco4all
PresentacióN Babaco4allPresentacióN Babaco4all
PresentacióN Babaco4all
 
Entregable Total
Entregable TotalEntregable Total
Entregable Total
 
Strategic Sourcing At P&G Julio 2009
Strategic Sourcing At P&G   Julio 2009Strategic Sourcing At P&G   Julio 2009
Strategic Sourcing At P&G Julio 2009
 
Informe De Ecuabank Enero 2009
Informe De Ecuabank   Enero 2009Informe De Ecuabank   Enero 2009
Informe De Ecuabank Enero 2009
 
Mba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGicaMba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGica
 
H A R M O N Y S E A R C H
H A R M O N Y  S E A R C HH A R M O N Y  S E A R C H
H A R M O N Y S E A R C H
 
Presentacion Algortimos Geneticos
Presentacion Algortimos GeneticosPresentacion Algortimos Geneticos
Presentacion Algortimos Geneticos
 
On Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y ViceversaOn Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y Viceversa
 
Canadiense ven a Ecuador
Canadiense ven a EcuadorCanadiense ven a Ecuador
Canadiense ven a Ecuador
 

Recently uploaded

ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPT
ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPTECONOMIC CONTEXT - LONG FORM TV DRAMA - PPT
ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPTiammrhaywood
 
Atmosphere science 7 quarter 4 .........
Atmosphere science 7 quarter 4 .........Atmosphere science 7 quarter 4 .........
Atmosphere science 7 quarter 4 .........LeaCamillePacle
 
Difference Between Search & Browse Methods in Odoo 17
Difference Between Search & Browse Methods in Odoo 17Difference Between Search & Browse Methods in Odoo 17
Difference Between Search & Browse Methods in Odoo 17Celine George
 
Alper Gobel In Media Res Media Component
Alper Gobel In Media Res Media ComponentAlper Gobel In Media Res Media Component
Alper Gobel In Media Res Media ComponentInMediaRes1
 
Solving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxSolving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxOH TEIK BIN
 
How to Configure Email Server in Odoo 17
How to Configure Email Server in Odoo 17How to Configure Email Server in Odoo 17
How to Configure Email Server in Odoo 17Celine George
 
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdf
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdfAMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdf
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdfphamnguyenenglishnb
 
Full Stack Web Development Course for Beginners
Full Stack Web Development Course  for BeginnersFull Stack Web Development Course  for Beginners
Full Stack Web Development Course for BeginnersSabitha Banu
 
ROOT CAUSE ANALYSIS PowerPoint Presentation
ROOT CAUSE ANALYSIS PowerPoint PresentationROOT CAUSE ANALYSIS PowerPoint Presentation
ROOT CAUSE ANALYSIS PowerPoint PresentationAadityaSharma884161
 
Keynote by Prof. Wurzer at Nordex about IP-design
Keynote by Prof. Wurzer at Nordex about IP-designKeynote by Prof. Wurzer at Nordex about IP-design
Keynote by Prof. Wurzer at Nordex about IP-designMIPLM
 
ENGLISH6-Q4-W3.pptxqurter our high choom
ENGLISH6-Q4-W3.pptxqurter our high choomENGLISH6-Q4-W3.pptxqurter our high choom
ENGLISH6-Q4-W3.pptxqurter our high choomnelietumpap1
 
Quarter 4 Peace-education.pptx Catch Up Friday
Quarter 4 Peace-education.pptx Catch Up FridayQuarter 4 Peace-education.pptx Catch Up Friday
Quarter 4 Peace-education.pptx Catch Up FridayMakMakNepo
 
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfFraming an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfUjwalaBharambe
 
Grade 9 Q4-MELC1-Active and Passive Voice.pptx
Grade 9 Q4-MELC1-Active and Passive Voice.pptxGrade 9 Q4-MELC1-Active and Passive Voice.pptx
Grade 9 Q4-MELC1-Active and Passive Voice.pptxChelloAnnAsuncion2
 
Procuring digital preservation CAN be quick and painless with our new dynamic...
Procuring digital preservation CAN be quick and painless with our new dynamic...Procuring digital preservation CAN be quick and painless with our new dynamic...
Procuring digital preservation CAN be quick and painless with our new dynamic...Jisc
 
Employee wellbeing at the workplace.pptx
Employee wellbeing at the workplace.pptxEmployee wellbeing at the workplace.pptx
Employee wellbeing at the workplace.pptxNirmalaLoungPoorunde1
 

Recently uploaded (20)

ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPT
ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPTECONOMIC CONTEXT - LONG FORM TV DRAMA - PPT
ECONOMIC CONTEXT - LONG FORM TV DRAMA - PPT
 
Atmosphere science 7 quarter 4 .........
Atmosphere science 7 quarter 4 .........Atmosphere science 7 quarter 4 .........
Atmosphere science 7 quarter 4 .........
 
Difference Between Search & Browse Methods in Odoo 17
Difference Between Search & Browse Methods in Odoo 17Difference Between Search & Browse Methods in Odoo 17
Difference Between Search & Browse Methods in Odoo 17
 
Alper Gobel In Media Res Media Component
Alper Gobel In Media Res Media ComponentAlper Gobel In Media Res Media Component
Alper Gobel In Media Res Media Component
 
Solving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxSolving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptx
 
TataKelola dan KamSiber Kecerdasan Buatan v022.pdf
TataKelola dan KamSiber Kecerdasan Buatan v022.pdfTataKelola dan KamSiber Kecerdasan Buatan v022.pdf
TataKelola dan KamSiber Kecerdasan Buatan v022.pdf
 
How to Configure Email Server in Odoo 17
How to Configure Email Server in Odoo 17How to Configure Email Server in Odoo 17
How to Configure Email Server in Odoo 17
 
Rapple "Scholarly Communications and the Sustainable Development Goals"
Rapple "Scholarly Communications and the Sustainable Development Goals"Rapple "Scholarly Communications and the Sustainable Development Goals"
Rapple "Scholarly Communications and the Sustainable Development Goals"
 
Model Call Girl in Bikash Puri Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Bikash Puri  Delhi reach out to us at 🔝9953056974🔝Model Call Girl in Bikash Puri  Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Bikash Puri Delhi reach out to us at 🔝9953056974🔝
 
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdf
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdfAMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdf
AMERICAN LANGUAGE HUB_Level2_Student'sBook_Answerkey.pdf
 
Full Stack Web Development Course for Beginners
Full Stack Web Development Course  for BeginnersFull Stack Web Development Course  for Beginners
Full Stack Web Development Course for Beginners
 
ROOT CAUSE ANALYSIS PowerPoint Presentation
ROOT CAUSE ANALYSIS PowerPoint PresentationROOT CAUSE ANALYSIS PowerPoint Presentation
ROOT CAUSE ANALYSIS PowerPoint Presentation
 
Keynote by Prof. Wurzer at Nordex about IP-design
Keynote by Prof. Wurzer at Nordex about IP-designKeynote by Prof. Wurzer at Nordex about IP-design
Keynote by Prof. Wurzer at Nordex about IP-design
 
ENGLISH6-Q4-W3.pptxqurter our high choom
ENGLISH6-Q4-W3.pptxqurter our high choomENGLISH6-Q4-W3.pptxqurter our high choom
ENGLISH6-Q4-W3.pptxqurter our high choom
 
Quarter 4 Peace-education.pptx Catch Up Friday
Quarter 4 Peace-education.pptx Catch Up FridayQuarter 4 Peace-education.pptx Catch Up Friday
Quarter 4 Peace-education.pptx Catch Up Friday
 
OS-operating systems- ch04 (Threads) ...
OS-operating systems- ch04 (Threads) ...OS-operating systems- ch04 (Threads) ...
OS-operating systems- ch04 (Threads) ...
 
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfFraming an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
 
Grade 9 Q4-MELC1-Active and Passive Voice.pptx
Grade 9 Q4-MELC1-Active and Passive Voice.pptxGrade 9 Q4-MELC1-Active and Passive Voice.pptx
Grade 9 Q4-MELC1-Active and Passive Voice.pptx
 
Procuring digital preservation CAN be quick and painless with our new dynamic...
Procuring digital preservation CAN be quick and painless with our new dynamic...Procuring digital preservation CAN be quick and painless with our new dynamic...
Procuring digital preservation CAN be quick and painless with our new dynamic...
 
Employee wellbeing at the workplace.pptx
Employee wellbeing at the workplace.pptxEmployee wellbeing at the workplace.pptx
Employee wellbeing at the workplace.pptx
 

Retrieval Systems and Metasearch Engines

  • 1. Metabuscadores Fabricio Echeverría pechever@espol.edu.ec Joseph Brodsky
  • 2. Agenda • Índices de palabras • Web Search Engine • Retrieval Information Systems • Metabuscadores • Preguntas
  • 3. En busca de la memoria dinámica extendida
  • 4. Índice de Palabras: Onomástica de los nombres en Catalán
  • 5. Web Search Engine • Lenguaje de programación: Python • Manejo de Alta RAM • Almacenamiento Compartido • Procesamiento en Paralelo
  • 7. Código Python – Web Search Engine def union(a, b): cache = { def crawl_web(seed): # returns index, graph of for e in b: 'http://www.udacity.com/cs101x/final/multi.html': """<html> inlinks <body> if e not in a: tocrawl = [seed] a.append(e) crawled = [] <a href="http://www.udacity.com/cs101x/final/a.html">A</a><br> graph = {} # <url>, [list of pages it links to] <a href="http://www.udacity.com/cs101x/final/b.html">B</a><br> def add_page_to_index(index, url, content): index = {} words = content.split() </body> while tocrawl: pos=0 """, page = tocrawl.pop() for word in words: 'http://www.udacity.com/cs101x/final/b.html': """<html> if page not in crawled: <body> pos=content.find(word, pos) content = get_page(page) add_to_index(index, word, url,pos) add_page_to_index(index, page, content) Monty likes the Python programming language outlinks = get_all_links(content) Thomas Jefferson founded the University of Virginia def add_to_index(index, keyword, url,pos): When Mandela was in London, he visited Nelson's Column. graph[page] = outlinks if keyword in index: union(tocrawl, outlinks) index[keyword].append([url,pos]) </body> crawled.append(page) else: </html> return index, graph """, index[keyword] = [[url,pos]] 'http://www.udacity.com/cs101x/final/a.html': """<html> def get_next_target(page): <body> def lookup(index, keyword): start_link = page.find('<a href=') if keyword in index: Monty Python is not about a programming language if start_link == -1: return index[keyword] Udacity was not founded by Thomas Jefferson return None, 0 else: Nelson Mandela said "Education is the most powerful weapon start_quote = page.find('"', start_link) return None which you can end_quote = page.find('"', start_quote + 1) use to change the world." url = page[start_quote + 1:end_quote] </body> return url, end_quote </html> """, def get_all_links(page): } links = [] def get_page(url): while True: if url in cache: url, endpos = get_next_target(page) return cache[url] if url: else: links.append(url) print "Page not in cache: " + url page = page[endpos:] return None else: break return links http://www.udacity.com/cs101
  • 9. Metabuscadores • Es la unión de búsquedas(query) en varios buscadores(Search Engine) – Índices de Búsquedas -