SlideShare a Scribd company logo
1 of 16
Metabuscadores

                     Fabricio Echeverría
                 pechever@espol.edu.ec



Joseph Brodsky
Agenda
   •   Índices de palabras
   •   Web Search Engine
   •   Retrieval Information Systems
   •   Metabuscadores
   •   Preguntas
En busca de la memoria
  dinámica extendida
Índice de Palabras: Onomástica de los
         nombres en Catalán
Web Search Engine
        • Lenguaje de programación:
          Python
        • Manejo de Alta RAM
        • Almacenamiento
          Compartido
        • Procesamiento en Paralelo
Web Search Engine




http://nlp.stanford.edu/IR-book/pdf/19web.pdf Pag.434
Código Python – Web Search Engine
                                                     def union(a, b):                              cache = {
def crawl_web(seed): # returns index, graph of         for e in b:                                   'http://www.udacity.com/cs101x/final/multi.html': """<html>
inlinks                                                                                            <body>
                                                         if e not in a:
   tocrawl = [seed]
                                                            a.append(e)
   crawled = []                                                                                    <a href="http://www.udacity.com/cs101x/final/a.html">A</a><br>
   graph = {} # <url>, [list of pages it links to]                                                 <a href="http://www.udacity.com/cs101x/final/b.html">B</a><br>
                                                     def add_page_to_index(index, url, content):
   index = {}
                                                       words = content.split()                     </body>
   while tocrawl:
                                                       pos=0                                       """,
     page = tocrawl.pop()
                                                       for word in words:                            'http://www.udacity.com/cs101x/final/b.html': """<html>
     if page not in crawled:                                                                       <body>
                                                         pos=content.find(word, pos)
        content = get_page(page)
                                                         add_to_index(index, word, url,pos)
        add_page_to_index(index, page, content)                                                    Monty likes the Python programming language
        outlinks = get_all_links(content)                                                          Thomas Jefferson founded the University of Virginia
                                                     def add_to_index(index, keyword, url,pos):    When Mandela was in London, he visited Nelson's Column.
        graph[page] = outlinks
                                                       if keyword in index:
        union(tocrawl, outlinks)
                                                          index[keyword].append([url,pos])         </body>
        crawled.append(page)
                                                       else:                                       </html>
   return index, graph                                                                             """,
                                                          index[keyword] = [[url,pos]]
                                                                                                     'http://www.udacity.com/cs101x/final/a.html': """<html>
def get_next_target(page):                                                                         <body>
                                                     def lookup(index, keyword):
  start_link = page.find('<a href=')
                                                       if keyword in index:                        Monty Python is not about a programming language
  if start_link == -1:
                                                          return index[keyword]                    Udacity was not founded by Thomas Jefferson
     return None, 0
                                                       else:                                       Nelson Mandela said "Education is the most powerful weapon
  start_quote = page.find('"', start_link)
                                                          return None                              which you can
  end_quote = page.find('"', start_quote + 1)                                                      use to change the world."
  url = page[start_quote + 1:end_quote]                                                            </body>
  return url, end_quote                                                                            </html>
                                                                                                   """,
def get_all_links(page):                                                                           }
  links = []
                                                                                                   def get_page(url):
  while True:
                                                                                                     if url in cache:
     url, endpos = get_next_target(page)                                                                return cache[url]
     if url:                                                                                         else:
        links.append(url)                                                                               print "Page not in cache: " + url
        page = page[endpos:]                                                                            return None
     else:
        break
  return links



http://www.udacity.com/cs101
Information Retrieval Systems
Metabuscadores
• Es la unión de búsquedas(query) en varios
  buscadores(Search Engine) – Índices de
  Búsquedas -
http://dg3rtljvitrle.cloudfront.net/slides/chap10.pdf
http://dg3rtljvitrle.cloudfront.net/slides/chap10.pdf
Meta Buscadores
Meta Buscadores
Meta Buscadores
Meta Buscadores
Meta Buscadores

More Related Content

What's hot

Huong dan cai dat hadoop
Huong dan cai dat hadoopHuong dan cai dat hadoop
Huong dan cai dat hadoop
Quỳnh Phan
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Jitendra Kumar Gupta
 
Up.Php
Up.PhpUp.Php
Up.Php
wsoom
 
Puppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future PastPuppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future Past
Puppet
 
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good parts
Conrad Irwin
 

What's hot (20)

Huong dan cai dat hadoop
Huong dan cai dat hadoopHuong dan cai dat hadoop
Huong dan cai dat hadoop
 
Cookies
CookiesCookies
Cookies
 
MongoDB: How it Works
MongoDB: How it WorksMongoDB: How it Works
MongoDB: How it Works
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
 
CouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conferenceCouchDB @ red dirt ruby conference
CouchDB @ red dirt ruby conference
 
Not Really PHP by the book
Not Really PHP by the bookNot Really PHP by the book
Not Really PHP by the book
 
REST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practiceREST teori og praksis; REST in theory and practice
REST teori og praksis; REST in theory and practice
 
Dropping ACID with MongoDB
Dropping ACID with MongoDBDropping ACID with MongoDB
Dropping ACID with MongoDB
 
Wordpress Manual Document
Wordpress Manual DocumentWordpress Manual Document
Wordpress Manual Document
 
01 ElasticSearch : Getting Started
01 ElasticSearch : Getting Started01 ElasticSearch : Getting Started
01 ElasticSearch : Getting Started
 
JSOP in 60 seconds
JSOP in 60 secondsJSOP in 60 seconds
JSOP in 60 seconds
 
Hadoop installation
Hadoop installationHadoop installation
Hadoop installation
 
Up.Php
Up.PhpUp.Php
Up.Php
 
Puppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future PastPuppet Camp Amsterdam 2015: Manifests of Future Past
Puppet Camp Amsterdam 2015: Manifests of Future Past
 
Pry, the good parts
Pry, the good partsPry, the good parts
Pry, the good parts
 
Couchdb w Ruby'm
Couchdb w Ruby'mCouchdb w Ruby'm
Couchdb w Ruby'm
 
Build PHP Search Engine
Build PHP Search EngineBuild PHP Search Engine
Build PHP Search Engine
 
一次Http请求过程分析
一次Http请求过程分析一次Http请求过程分析
一次Http请求过程分析
 
Python and MongoDB
Python and MongoDBPython and MongoDB
Python and MongoDB
 
08 php-files
08 php-files08 php-files
08 php-files
 

Viewers also liked (7)

Drupal 7 module development
Drupal 7 module developmentDrupal 7 module development
Drupal 7 module development
 
Iso Principles
Iso PrinciplesIso Principles
Iso Principles
 
Irmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hallIrmãs Fox in Corynthian hall
Irmãs Fox in Corynthian hall
 
Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...Short description of most useful article databases in Biology at Uppsala univ...
Short description of most useful article databases in Biology at Uppsala univ...
 
East Bay Ruby Tropo presentation
East Bay Ruby Tropo presentationEast Bay Ruby Tropo presentation
East Bay Ruby Tropo presentation
 
Introduction to jRuby
Introduction to jRubyIntroduction to jRuby
Introduction to jRuby
 
Barack Obama Family Tree
Barack Obama Family TreeBarack Obama Family Tree
Barack Obama Family Tree
 

Similar to Meta Buscadores

Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)
Leonardo Soto
 
12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat
Jonathan Linowes
 
Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)
Leonardo Soto
 
TurboGears2 Pluggable Applications
TurboGears2 Pluggable ApplicationsTurboGears2 Pluggable Applications
TurboGears2 Pluggable Applications
Alessandro Molina
 
Dev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDBDev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDB
MongoDB
 

Similar to Meta Buscadores (20)

Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)Jython: Python para la plataforma Java (EL2009)
Jython: Python para la plataforma Java (EL2009)
 
Working Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in OpalWorking Effectively with Legacy Javascript code in Opal
Working Effectively with Legacy Javascript code in Opal
 
Finding Clojure
Finding ClojureFinding Clojure
Finding Clojure
 
Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011Remixing Confluence with Speakeasy - AtlasCamp 2011
Remixing Confluence with Speakeasy - AtlasCamp 2011
 
12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat12 core technologies you should learn, love, and hate to be a 'real' technocrat
12 core technologies you should learn, love, and hate to be a 'real' technocrat
 
Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)Jython: Python para la plataforma Java (JRSL 09)
Jython: Python para la plataforma Java (JRSL 09)
 
Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4Python Code Camp for Professionals 3/4
Python Code Camp for Professionals 3/4
 
TurboGears2 Pluggable Applications
TurboGears2 Pluggable ApplicationsTurboGears2 Pluggable Applications
TurboGears2 Pluggable Applications
 
SEO for Developers
SEO for DevelopersSEO for Developers
SEO for Developers
 
Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4Python Code Camp for Professionals 1/4
Python Code Camp for Professionals 1/4
 
Building Apps with MongoDB
Building Apps with MongoDBBuilding Apps with MongoDB
Building Apps with MongoDB
 
Creating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data MiningCreating Operational Redundancy for Effective Web Data Mining
Creating Operational Redundancy for Effective Web Data Mining
 
Web Scraping is BS
Web Scraping is BSWeb Scraping is BS
Web Scraping is BS
 
SF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDBSF Elixir Meetup - RethinkDB
SF Elixir Meetup - RethinkDB
 
[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018[WLDN] Supercharging word press development in 2018
[WLDN] Supercharging word press development in 2018
 
Search as main navigation
Search as main navigationSearch as main navigation
Search as main navigation
 
Dev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDBDev Jumpstart: Build Your First App with MongoDB
Dev Jumpstart: Build Your First App with MongoDB
 
20190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React201920190118_NetadashiMeetup#8_React2019
20190118_NetadashiMeetup#8_React2019
 
Practical HTML5: Using It Today
Practical HTML5: Using It TodayPractical HTML5: Using It Today
Practical HTML5: Using It Today
 
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
QA Lab: тестирование ПО. Яков Крамаренко: "KISS Automation"
 

More from pechever

La administración de variedad
La administración de variedadLa administración de variedad
La administración de variedad
pechever
 
Red de repositorios del ecuador informe clara 3
Red de repositorios del ecuador   informe clara 3Red de repositorios del ecuador   informe clara 3
Red de repositorios del ecuador informe clara 3
pechever
 
Webometrics
WebometricsWebometrics
Webometrics
pechever
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
pechever
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
pechever
 
Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4
pechever
 
Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3
pechever
 
Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2
pechever
 
Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1
pechever
 
Presentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOLPresentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOL
pechever
 
PresentacióN Babaco4all
PresentacióN Babaco4allPresentacióN Babaco4all
PresentacióN Babaco4all
pechever
 
Entregable Total
Entregable TotalEntregable Total
Entregable Total
pechever
 
Strategic Sourcing At P&G Julio 2009
Strategic Sourcing At P&G   Julio 2009Strategic Sourcing At P&G   Julio 2009
Strategic Sourcing At P&G Julio 2009
pechever
 
Informe De Ecuabank Enero 2009
Informe De Ecuabank   Enero 2009Informe De Ecuabank   Enero 2009
Informe De Ecuabank Enero 2009
pechever
 
Mba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGicaMba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGica
pechever
 
H A R M O N Y S E A R C H
H A R M O N Y  S E A R C HH A R M O N Y  S E A R C H
H A R M O N Y S E A R C H
pechever
 
Presentacion Algortimos Geneticos
Presentacion Algortimos GeneticosPresentacion Algortimos Geneticos
Presentacion Algortimos Geneticos
pechever
 

More from pechever (20)

La administración de variedad
La administración de variedadLa administración de variedad
La administración de variedad
 
Red de repositorios del ecuador informe clara 3
Red de repositorios del ecuador   informe clara 3Red de repositorios del ecuador   informe clara 3
Red de repositorios del ecuador informe clara 3
 
Webometrics
WebometricsWebometrics
Webometrics
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
 
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000Webometrics julio 2010 puesto 25 de lationamerica 702 20000
Webometrics julio 2010 puesto 25 de lationamerica 702 20000
 
Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4Círculo de las Mejoras -- Ejemplo 4
Círculo de las Mejoras -- Ejemplo 4
 
Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3Círculo de las Mejoras -- Ejemplo 3
Círculo de las Mejoras -- Ejemplo 3
 
Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2Círculo de las Mejoras -- Ejemplo 2
Círculo de las Mejoras -- Ejemplo 2
 
Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1Círculo de las Mejoras -- Ejemplo 1
Círculo de las Mejoras -- Ejemplo 1
 
Informe De Red Repositorios Diciembre 2009
Informe De Red Repositorios    Diciembre 2009Informe De Red Repositorios    Diciembre 2009
Informe De Red Repositorios Diciembre 2009
 
Presentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOLPresentación de Repositorio de ESPOL
Presentación de Repositorio de ESPOL
 
PresentacióN Babaco4all
PresentacióN Babaco4allPresentacióN Babaco4all
PresentacióN Babaco4all
 
Entregable Total
Entregable TotalEntregable Total
Entregable Total
 
Strategic Sourcing At P&G Julio 2009
Strategic Sourcing At P&G   Julio 2009Strategic Sourcing At P&G   Julio 2009
Strategic Sourcing At P&G Julio 2009
 
Informe De Ecuabank Enero 2009
Informe De Ecuabank   Enero 2009Informe De Ecuabank   Enero 2009
Informe De Ecuabank Enero 2009
 
Mba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGicaMba PresentacióN AdministracióN EstratéGica
Mba PresentacióN AdministracióN EstratéGica
 
H A R M O N Y S E A R C H
H A R M O N Y  S E A R C HH A R M O N Y  S E A R C H
H A R M O N Y S E A R C H
 
Presentacion Algortimos Geneticos
Presentacion Algortimos GeneticosPresentacion Algortimos Geneticos
Presentacion Algortimos Geneticos
 
On Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y ViceversaOn Line A Lo Off Line Y Viceversa
On Line A Lo Off Line Y Viceversa
 
Canadiense ven a Ecuador
Canadiense ven a EcuadorCanadiense ven a Ecuador
Canadiense ven a Ecuador
 

Recently uploaded

1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
QucHHunhnh
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdf
QucHHunhnh
 
Spellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please PractiseSpellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please Practise
AnaAcapella
 
The basics of sentences session 3pptx.pptx
The basics of sentences session 3pptx.pptxThe basics of sentences session 3pptx.pptx
The basics of sentences session 3pptx.pptx
heathfieldcps1
 

Recently uploaded (20)

Accessible Digital Futures project (20/03/2024)
Accessible Digital Futures project (20/03/2024)Accessible Digital Futures project (20/03/2024)
Accessible Digital Futures project (20/03/2024)
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
 
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
2024-NATIONAL-LEARNING-CAMP-AND-OTHER.pptx
 
ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.ICT role in 21st century education and it's challenges.
ICT role in 21st century education and it's challenges.
 
On National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan FellowsOn National Teacher Day, meet the 2024-25 Kenan Fellows
On National Teacher Day, meet the 2024-25 Kenan Fellows
 
Sociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning ExhibitSociology 101 Demonstration of Learning Exhibit
Sociology 101 Demonstration of Learning Exhibit
 
Mixin Classes in Odoo 17 How to Extend Models Using Mixin Classes
Mixin Classes in Odoo 17  How to Extend Models Using Mixin ClassesMixin Classes in Odoo 17  How to Extend Models Using Mixin Classes
Mixin Classes in Odoo 17 How to Extend Models Using Mixin Classes
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdf
 
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptxSKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
SKILL OF INTRODUCING THE LESSON MICRO SKILLS.pptx
 
Unit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptxUnit-IV; Professional Sales Representative (PSR).pptx
Unit-IV; Professional Sales Representative (PSR).pptx
 
Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...Making communications land - Are they received and understood as intended? we...
Making communications land - Are they received and understood as intended? we...
 
How to Give a Domain for a Field in Odoo 17
How to Give a Domain for a Field in Odoo 17How to Give a Domain for a Field in Odoo 17
How to Give a Domain for a Field in Odoo 17
 
How to Create and Manage Wizard in Odoo 17
How to Create and Manage Wizard in Odoo 17How to Create and Manage Wizard in Odoo 17
How to Create and Manage Wizard in Odoo 17
 
Spellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please PractiseSpellings Wk 3 English CAPS CARES Please Practise
Spellings Wk 3 English CAPS CARES Please Practise
 
Single or Multiple melodic lines structure
Single or Multiple melodic lines structureSingle or Multiple melodic lines structure
Single or Multiple melodic lines structure
 
The basics of sentences session 3pptx.pptx
The basics of sentences session 3pptx.pptxThe basics of sentences session 3pptx.pptx
The basics of sentences session 3pptx.pptx
 
Kodo Millet PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
Kodo Millet  PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...Kodo Millet  PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
Kodo Millet PPT made by Ghanshyam bairwa college of Agriculture kumher bhara...
 
HMCS Max Bernays Pre-Deployment Brief (May 2024).pptx
HMCS Max Bernays Pre-Deployment Brief (May 2024).pptxHMCS Max Bernays Pre-Deployment Brief (May 2024).pptx
HMCS Max Bernays Pre-Deployment Brief (May 2024).pptx
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
 
Understanding Accommodations and Modifications
Understanding  Accommodations and ModificationsUnderstanding  Accommodations and Modifications
Understanding Accommodations and Modifications
 

Meta Buscadores

  • 1. Metabuscadores Fabricio Echeverría pechever@espol.edu.ec Joseph Brodsky
  • 2. Agenda • Índices de palabras • Web Search Engine • Retrieval Information Systems • Metabuscadores • Preguntas
  • 3. En busca de la memoria dinámica extendida
  • 4. Índice de Palabras: Onomástica de los nombres en Catalán
  • 5. Web Search Engine • Lenguaje de programación: Python • Manejo de Alta RAM • Almacenamiento Compartido • Procesamiento en Paralelo
  • 7. Código Python – Web Search Engine def union(a, b): cache = { def crawl_web(seed): # returns index, graph of for e in b: 'http://www.udacity.com/cs101x/final/multi.html': """<html> inlinks <body> if e not in a: tocrawl = [seed] a.append(e) crawled = [] <a href="http://www.udacity.com/cs101x/final/a.html">A</a><br> graph = {} # <url>, [list of pages it links to] <a href="http://www.udacity.com/cs101x/final/b.html">B</a><br> def add_page_to_index(index, url, content): index = {} words = content.split() </body> while tocrawl: pos=0 """, page = tocrawl.pop() for word in words: 'http://www.udacity.com/cs101x/final/b.html': """<html> if page not in crawled: <body> pos=content.find(word, pos) content = get_page(page) add_to_index(index, word, url,pos) add_page_to_index(index, page, content) Monty likes the Python programming language outlinks = get_all_links(content) Thomas Jefferson founded the University of Virginia def add_to_index(index, keyword, url,pos): When Mandela was in London, he visited Nelson's Column. graph[page] = outlinks if keyword in index: union(tocrawl, outlinks) index[keyword].append([url,pos]) </body> crawled.append(page) else: </html> return index, graph """, index[keyword] = [[url,pos]] 'http://www.udacity.com/cs101x/final/a.html': """<html> def get_next_target(page): <body> def lookup(index, keyword): start_link = page.find('<a href=') if keyword in index: Monty Python is not about a programming language if start_link == -1: return index[keyword] Udacity was not founded by Thomas Jefferson return None, 0 else: Nelson Mandela said "Education is the most powerful weapon start_quote = page.find('"', start_link) return None which you can end_quote = page.find('"', start_quote + 1) use to change the world." url = page[start_quote + 1:end_quote] </body> return url, end_quote </html> """, def get_all_links(page): } links = [] def get_page(url): while True: if url in cache: url, endpos = get_next_target(page) return cache[url] if url: else: links.append(url) print "Page not in cache: " + url page = page[endpos:] return None else: break return links http://www.udacity.com/cs101
  • 9. Metabuscadores • Es la unión de búsquedas(query) en varios buscadores(Search Engine) – Índices de Búsquedas -