SlideShare a Scribd company logo
Inteligencia Artificial
Lenguaje Python: Procesamiento del Lenguaje Natural.
Universidad Nacional de Ingeniería
FACULTAD DE CIENCIAS Y SISTEMAS – DEPARTAMENTO DE INFORMÁTICA
Procesamiento del Lenguaje Natural con Python.
Recopilación de datos de archivos PDF.
!pip install PyPDF2
import PyPDF2
from PyPDF2 import PdfFileReader
pdf = open("file.pdf","rb")
pdf_reader = PyPDF2.PdfFileReader(pdf)
print(pdf_reader.numPages)
page = pdf_reader.getPage(0)
print(page.extractText())
pdf.close()
Recopilación de datos de archivos Word.
!pip install docx
from docx import Document
doc = open("file.docx","rb")
document = docx.Document(doc)
docu=""
for para in document.paragraphs:
docu += para.text
print(docu)
Recopilación de datos de archivos HTML.
!pip install bs4
import urllib.request as urllib2
from bs4 import BeautifulSoup
response = urllib2.urlopen('https://en.wikipedia.org/wiki/Natural_language_processing')
html_doc = response.read()
soup = BeautifulSoup(html_doc, 'html.parser')
strhtm = soup.prettify()
print (strhtm[:1000])
print(soup.title)
print(soup.title.string)
print(soup.a.string)
print(soup.b.string)
# Extrayendo todas las instancias de una etiqueta particular
for x in soup.find_all('a'): print(x.string)
# Extrayendo todo el texto de una etiqueta particular
for x in soup.find_all('p'): print(x.text)
Arañando texto desde la web.
Antes de “escarbar o arañar” cualquier sitio web, blog o comercio electrónico sitios web, asegúrese
de leer los términos y condiciones del sitio web sobre si otorga permisos para escarbar sus datos.
!pip install bs4
!pip install requests
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import Series, DataFrame
from ipywidgets import FloatProgress
from time import sleep
from IPython.display import display
import re
import pickle
url = 'http://www.imdb.com/chart/top?ref_=nv_mv_250_6'
result = requests.get(url)
c = result.content
soup = BeautifulSoup(c,"lxml")'
summary = soup.find('div',{'class':'article'})
# Creando listas vacías para agregar los datos extraídos.
moviename = []
cast = []
description = []
rating = []
ratingoutof = []
year = []
genre = []
movielength = []
rot_audscore = []
rot_avgrating = []
rot_users = []
# Extrayendo los datos requeridos del soup html.
rgx = re.compile('[%s]' % '()')
f = FloatProgress(min=0, max=250)
display(f)
for row,i in zip(summary.find('table').
findAll('tr'),range(len(summary.find('table').findAll('tr')))):
for sitem in row.findAll('span',{'class':'secondaryInfo'}):
s = sitem.find(text=True)
year.append(rgx.sub(", s))
for ritem in row.findAll('td',{'class':'ratingColumn imdbRating'}):
for iget in ritem.findAll('strong'):
rating.append(iget.find(text=True))
ratingoutof.append(iget.get('title').split(' ', 4)[3])
for item in row.findAll('td',{'class':'titleColumn'}):
for href in item.findAll('a',href=True):
moviename.append(href.find(text=True))
rurl = 'https://www.rottentomatoes.com/m/'+ href.
find(text=True)
try:
rresult = requests.get(rurl)
except requests.exceptions.ConnectionError:
status_code = "Connection refused"
rc = rresult.content
rsoup = BeautifulSoup(rc)
try:
rot_audscore.append(rsoup.find('div',
{'class':'meter-value'}).find('span',
{'class':'superPageFontColor'}).text)
rot_avgrating.append(rsoup.find('div',
{'class':'audience-info hidden-xs
superPageFontColor'}).find('div').contents[2].
strip())
rot_users.append(rsoup.find('div',
{'class':'audience-info hidden-xs
superPageFontColor'}).contents[3].contents[2].
strip())
except AttributeError:
rot_audscore.append("")
rot_avgrating.append("")
rot_users.append("")
cast.append(href.get('title'))
imdb = "http://www.imdb.com" + href.get('href')
try:
iresult = requests.get(imdb)
ic = iresult.content
isoup = BeautifulSoup(ic)
description.append(isoup.find('div',
{'class':'summary_text'}).find(text=True).strip())
genre.append(isoup.find('span',{'class':'itempr
op'}).find(text=True))
movielength.append(isoup.find('time',
{'itemprop':'duration'}).find(text=True).strip())
except requests.exceptions.ConnectionError:
description.append("")
genre.append("")
movielength.append("")
sleep(.1)
f.value = i
# Listar a pandas series
moviename = Series(moviename)
cast = Series(cast)
description = Series(description)
rating = Series(rating)
ratingoutof = Series(ratingoutof)
year = Series(year)
genre = Series(genre)
movielength = Series(movielength)
rot_audscore = Series(rot_audscore)
rot_avgrating = Series(rot_avgrating)
rot_users = Series(rot_users)
# creando un dataframe y analizándolo
imdb_df = pd.concat([moviename,year,description,genre,
movielength,cast,rating,ratingoutof,
rot_audscore,rot_avgrating,rot_users],axis=1)
imdb_df.columns = [ 'moviename','year','description','genre',
'movielength','cast','imdb_rating',
'imdb_ratingbasedon','tomatoes_audscore',
'tomatoes_rating','tomatoes_ratingbasedon']
imdb_df['rank'] = imdb_df.index + 1
imdb_df.head(1)
# Guardando el archivo como CSV.
imdb_df.to_csv("imdbdataexport.csv")
Generando N-gramas usando TextBlob.
Text = "Estoy aprendiendo PNL"
#Importando textblob
from textblob import TextBlob
#Para unigram : Use n = 1
TextBlob(Text).ngrams(1)
TextBlob(Text).ngrams(2)
Extraer frases nominales.
#Importando nltk
import nltk
from textblob import TextBlob
#Extrayendo elementos
blob = TextBlob("John está aprendiendo el procesamiento del lenguaje natural")
for np in blob.noun_phrases:
print(np)
Encontrar similitudes entre textos
documents = ("Me gusta la PNL",
"Estoy explorando PNL",
"Soy un principiante en PNL",
"Quiero aprender PNL",
"Me gusta la PNL avanzada")
#Importando bibliotecas
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
#Compute tfidf: método de ingeniería de características llamado matriz de coincidencia.
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
tfidf_matrix.shape
# Salida
# Calcular similitud para la primera oración con el resto de las oraciones
cosine_similarity(tfidf_matrix[0:1],tfidf_matrix)
# Salida
Correspondencia fonética
!pip install fuzzy
import fuzzy
soundex = fuzzy.Soundex(4)
soundex('natural')
#salida
'N364'
soundex('natuaral')
104
#salida
'N364'
soundex('language')
#salida
'L52'
soundex('processing')
#salida
'P625'
Etiquetar partes del lenguaje.
Text = "I love NLP and I will learn NLP in 2 month"
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
stop_words = set(stopwords.words('english'))
# Tokenizar el texto
tokens = sent_tokenize(text)
#Genera etiquetado para todos los tokens usando loop
for i in tokens:
words = nltk.word_tokenize(i)
words = [w for w in words if not w in stop_words]
# Etiquetador.
tags = nltk.pos_tag(words)
tags
#salida:
[('I', 'PRP'),
('love', 'VBP'),
('NLP', 'NNP'),
('I', 'PRP'),
('learn', 'VBP'),
('NLP', 'RB'),
('2month', 'CD')]
• CC coordinating conjunction.
• CD cardinal digit.
• DT determiner.
• EX existential there (like: “there is” ... think of it like
• “there exists”.)
• FW foreign word.
• IN preposition/subordinating conjunction.
• JJ adjective ‘big’.
• JJR adjective, comparative ‘bigger’.
• JJS adjective, superlative ‘biggest’.
• LS list marker 1.)
• MD modal could, will.
• NN noun, singular ‘desk’.
• NNS noun plural ‘desks’.
• NNP proper noun, singular ‘Harrison’.
• NNPS proper noun, plural ‘Americans’.
• PDT predeterminer ‘all the kids’.
• POS possessive ending parent’s.
• PRP personal pronoun I, he, she.
• PRP$ possessive pronoun my, his, hers.
• RB adverb very, silently.
• RBR adverb, comparative better.
• RBS adverb, superlative best.
• RP particle give up.
• TO to go ‘to’ the store.
• UH interjection.
• VB verb, base form take.
• VBD verb, past tense took.
• VBG verb, gerund/present participle taking.
• VBN verb, past participle taken.
• VBP verb, sing. present, non-3d take.
• VBZ verb, 3rd person sing. present takes.
• WDT wh-determiner which.
• WP wh-pronoun who, what.
• WP$ possessive wh-pronoun whose.
• WRB wh-adverb where, when.
Extraer entidades del texto.
sent = "John is studying at Stanford University in California"
import nltk
from nltk import ne_chunk
from nltk import word_tokenize
ne_chunk(nltk.pos_tag(word_tokenize(sent)), binary=False)
#salida
Tree('S', [Tree('PERSON', [('John', 'NNP')]), ('is', 'VBZ'),
('studying', 'VBG'), ('at', 'IN'), Tree('ORGANIZATION',
[('Stanford', 'NNP'), ('University', 'NNP')]), ('in', 'IN'),
Tree('GPE', [('California', 'NNP')])])
Here "John" is tagged as "PERSON"
"Stanford" as "ORGANIZATION"
"California" as "GPE". Geopolitical entity, i.e. countries, cities, states.
#Usando SpaCy
import spacy
nlp = spacy.load('en')
# Leer o crear una sentencia
doc = nlp(u'Apple is ready to launch new phone worth $10000 in New york time square ')
for ent in doc.ents:
print(ent.text, ent.start_char, ent.end_char, ent.label_)
#salida
Apple 0 5 ORG
10000 42 47 MONEY
New york 51 59 GPE
Análisis de sentimientos.
Para hacer un análisis de sentimientos use TextBlob. El cual, básicamente dará 2 métricas.
1. Polaridad = La polaridad se encuentra en el rango de [-1,1] donde 1 significa un
enunciado positivo y -1 significa una sentencia negativa.
2. Subjetividad = Subjetividad se refiere a que principalmente es una opinión personal
y no una información objetiva [0,1].
review = "I like this phone. screen quality and camera clarity is really good."
review2 = "This tv is not good. Bad quality, no clarity, worst experience"
from textblob import TextBlob
#TextBlob tiene un modelo de predicción de sentimientos previamente entrenado
blob = TextBlob(review)
blob.sentiment
#salida
Sentiment(polarity=0.7, subjectivity=0.6000000000000001)
blob = TextBlob(review2)
blob.sentiment
#salida
Sentiment(polarity=-0.6833333333333332,
subjectivity=0.7555555555555555)
# Esta es una crítica negativa, ya que la polaridad es "-0.68".
Texto ambiguo.
Text1 = 'I went to the bank to deposit my money'
Text2 = 'The river bank was full of dead fishes'
#Instalar pywsd
!pip install pywsd
#Importar funciones
from nltk.corpus import wordnet as wn
from nltk.stem import PorterStemmer
from itertools import chain
from pywsd.lesk import simple_lesk
# Frases
bank_sents = ['I went to the bank to deposit my money', 'The river bank was full of dead
fishes']
# llamando a la función lesk e imprimiendo resultados tanto ambas frases
print ("Context-1:", bank_sents[0])
answer = simple_lesk(bank_sents[0],'bank')
print ("Sense:", answer)
print ("Definition : ", answer.definition())
print ("Context-2:", bank_sents[1])
answer = simple_lesk(bank_sents[1],'bank','n')
print ("Sense:", answer)
print ("Definition : ", answer.definition())
#Resultado:
Context-1: I went to the bank to deposit my money
Sense: Synset('depository_financial_institution.n.01')
Definition : a financial institution that accepts deposits and channels the money into
lending activities
Context-2: The river bank was full of dead fishes
Sense: Synset('bank.n.01')
Definition : sloping land (especially the slope beside a body of water)
Convertir voz a texto.
!pip install SpeechRecognition
!pip install PyAudio
import speech_recognition as sr
r=sr.Recognizer()
with sr.Microphone() as source:
print("Please say something")
audio = r.listen(source)
print("Time over, thanks")
try:
print("I think you said: "+r.recognize_google(audio));
#print("I think you said: "+r.recognize_google(audio, language ='en-EN'));
#except sr.UnknownValueError:
#print("Google Speech Recognition could not understand audio")
#except sr.RequestError as e:
#print("Could not request results from Google Speech
#Recognition service; {0}".format(e))
except:
pass;
#Salida
Please say something
Time over, thanks
I think you said: I am learning natural language processing
Convertir texto a voz.
!pip install gTTS
from gtts import gTTS
#elija el lenguaje, English('en')
convert = gTTS(text='I like this NLP book', lang='en', slow=False)
# Guardando el audio en un archivo mp3 llamado
myobj.save("audio.mp3")
#output: Por favor reproduzca el archivo audio.mp3 guardado localmente en su máquina
para escucharlo
Traducción de voz.
!pip install goslate
import goslate
text = "Bonjour le monde"
gs = goslate.Goslate()
translatedText = gs.translate(text,'en')
print(translatedText)
#Salida
Hi world
Procesamiento del lenguaje natural con python

More Related Content

What's hot

Functional Programming with Groovy
Functional Programming with GroovyFunctional Programming with Groovy
Functional Programming with Groovy
Arturo Herrero
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)MongoSF
 
Python client api
Python client apiPython client api
Python client api
dreampuf
 
PHP 7 – What changed internally? (PHP Barcelona 2015)
PHP 7 – What changed internally? (PHP Barcelona 2015)PHP 7 – What changed internally? (PHP Barcelona 2015)
PHP 7 – What changed internally? (PHP Barcelona 2015)
Nikita Popov
 
GeeCON Prague 2014 - Metaprogramming with Groovy
GeeCON Prague 2014 - Metaprogramming with GroovyGeeCON Prague 2014 - Metaprogramming with Groovy
GeeCON Prague 2014 - Metaprogramming with Groovy
Iván López Martín
 
Round PEG, Round Hole - Parsing Functionally
Round PEG, Round Hole - Parsing FunctionallyRound PEG, Round Hole - Parsing Functionally
Round PEG, Round Hole - Parsing Functionally
Sean Cribbs
 
Getting groovy
Getting groovyGetting groovy
Getting groovy
Nick Dixon
 
Velocity 2pp
Velocity 2ppVelocity 2pp
Velocity 2pp
Leandro Lima
 
Erlang/OTP for Rubyists
Erlang/OTP for RubyistsErlang/OTP for Rubyists
Erlang/OTP for Rubyists
Sean Cribbs
 
Don't do this
Don't do thisDon't do this
Don't do this
Richard Jones
 
groovy & grails - lecture 3
groovy & grails - lecture 3groovy & grails - lecture 3
groovy & grails - lecture 3
Alexandre Masselot
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Jitendra Kumar Gupta
 
Introduction to Groovy
Introduction to GroovyIntroduction to Groovy
Introduction to Groovy
André Faria Gomes
 
Odessapy2013 - Graph databases and Python
Odessapy2013 - Graph databases and PythonOdessapy2013 - Graph databases and Python
Odessapy2013 - Graph databases and Python
Max Klymyshyn
 
Functional Error Handling with Cats
Functional Error Handling with CatsFunctional Error Handling with Cats
Functional Error Handling with Cats
Mark Canlas
 
Beyond javascript using the features of tomorrow
Beyond javascript   using the features of tomorrowBeyond javascript   using the features of tomorrow
Beyond javascript using the features of tomorrow
Alexander Varwijk
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)Night Sailer
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
Howard Lewis Ship
 

What's hot (20)

Functional Programming with Groovy
Functional Programming with GroovyFunctional Programming with Groovy
Functional Programming with Groovy
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
 
Python client api
Python client apiPython client api
Python client api
 
PHP 7 – What changed internally? (PHP Barcelona 2015)
PHP 7 – What changed internally? (PHP Barcelona 2015)PHP 7 – What changed internally? (PHP Barcelona 2015)
PHP 7 – What changed internally? (PHP Barcelona 2015)
 
GeeCON Prague 2014 - Metaprogramming with Groovy
GeeCON Prague 2014 - Metaprogramming with GroovyGeeCON Prague 2014 - Metaprogramming with Groovy
GeeCON Prague 2014 - Metaprogramming with Groovy
 
Round PEG, Round Hole - Parsing Functionally
Round PEG, Round Hole - Parsing FunctionallyRound PEG, Round Hole - Parsing Functionally
Round PEG, Round Hole - Parsing Functionally
 
Getting groovy
Getting groovyGetting groovy
Getting groovy
 
Velocity 2pp
Velocity 2ppVelocity 2pp
Velocity 2pp
 
Having Fun Programming!
Having Fun Programming!Having Fun Programming!
Having Fun Programming!
 
Erlang/OTP for Rubyists
Erlang/OTP for RubyistsErlang/OTP for Rubyists
Erlang/OTP for Rubyists
 
Don't do this
Don't do thisDon't do this
Don't do this
 
groovy & grails - lecture 3
groovy & grails - lecture 3groovy & grails - lecture 3
groovy & grails - lecture 3
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
 
Introduction to Groovy
Introduction to GroovyIntroduction to Groovy
Introduction to Groovy
 
Odessapy2013 - Graph databases and Python
Odessapy2013 - Graph databases and PythonOdessapy2013 - Graph databases and Python
Odessapy2013 - Graph databases and Python
 
Functional Error Handling with Cats
Functional Error Handling with CatsFunctional Error Handling with Cats
Functional Error Handling with Cats
 
Beyond javascript using the features of tomorrow
Beyond javascript   using the features of tomorrowBeyond javascript   using the features of tomorrow
Beyond javascript using the features of tomorrow
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
 
Quebec pdo
Quebec pdoQuebec pdo
Quebec pdo
 

Similar to Procesamiento del lenguaje natural con python

Using the following code Install Packages pip install .pdf
Using the following code Install Packages   pip install .pdfUsing the following code Install Packages   pip install .pdf
Using the following code Install Packages pip install .pdf
picscamshoppe
 
360|iDev
360|iDev360|iDev
360|iDev
Aijaz Ansari
 
pa-pe-pi-po-pure Python Text Processing
pa-pe-pi-po-pure Python Text Processingpa-pe-pi-po-pure Python Text Processing
pa-pe-pi-po-pure Python Text Processing
Rodrigo Senra
 
Http4s, Doobie and Circe: The Functional Web Stack
Http4s, Doobie and Circe: The Functional Web StackHttp4s, Doobie and Circe: The Functional Web Stack
Http4s, Doobie and Circe: The Functional Web Stack
GaryCoady
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?
Adam Dudczak
 
Python crush course
Python crush coursePython crush course
Python crush course
Mohammed El Rafie Tarabay
 
Python slide
Python slidePython slide
SWP - A Generic Language Parser
SWP - A Generic Language ParserSWP - A Generic Language Parser
SWP - A Generic Language Parser
kamaelian
 
Python basic
Python basicPython basic
Python basic
Saifuddin Kaijar
 
Python basic
Python basic Python basic
Python basic
sewoo lee
 
app4.pptx
app4.pptxapp4.pptx
app4.pptx
sg4795
 
Profiling and optimization
Profiling and optimizationProfiling and optimization
Profiling and optimizationg3_nittala
 
So I am writing a CS code for a project and I keep getting cannot .pdf
So I am writing a CS code for a project and I keep getting cannot .pdfSo I am writing a CS code for a project and I keep getting cannot .pdf
So I am writing a CS code for a project and I keep getting cannot .pdf
ezonesolutions
 
Music as data
Music as dataMusic as data
Music as data
John Vlachoyiannis
 
ITT 2015 - Saul Mora - Object Oriented Function Programming
ITT 2015 - Saul Mora - Object Oriented Function ProgrammingITT 2015 - Saul Mora - Object Oriented Function Programming
ITT 2015 - Saul Mora - Object Oriented Function Programming
Istanbul Tech Talks
 
III MCS python lab (1).pdf
III MCS python lab (1).pdfIII MCS python lab (1).pdf
III MCS python lab (1).pdf
srxerox
 
第二讲 预备-Python基礎
第二讲 预备-Python基礎第二讲 预备-Python基礎
第二讲 预备-Python基礎
anzhong70
 
第二讲 Python基礎
第二讲 Python基礎第二讲 Python基礎
第二讲 Python基礎
juzihua1102
 
Basic NLP with Python and NLTK
Basic NLP with Python and NLTKBasic NLP with Python and NLTK
Basic NLP with Python and NLTK
Francesco Bruni
 
The (unknown) collections module
The (unknown) collections moduleThe (unknown) collections module
The (unknown) collections module
Pablo Enfedaque
 

Similar to Procesamiento del lenguaje natural con python (20)

Using the following code Install Packages pip install .pdf
Using the following code Install Packages   pip install .pdfUsing the following code Install Packages   pip install .pdf
Using the following code Install Packages pip install .pdf
 
360|iDev
360|iDev360|iDev
360|iDev
 
pa-pe-pi-po-pure Python Text Processing
pa-pe-pi-po-pure Python Text Processingpa-pe-pi-po-pure Python Text Processing
pa-pe-pi-po-pure Python Text Processing
 
Http4s, Doobie and Circe: The Functional Web Stack
Http4s, Doobie and Circe: The Functional Web StackHttp4s, Doobie and Circe: The Functional Web Stack
Http4s, Doobie and Circe: The Functional Web Stack
 
Are we ready to Go?
Are we ready to Go?Are we ready to Go?
Are we ready to Go?
 
Python crush course
Python crush coursePython crush course
Python crush course
 
Python slide
Python slidePython slide
Python slide
 
SWP - A Generic Language Parser
SWP - A Generic Language ParserSWP - A Generic Language Parser
SWP - A Generic Language Parser
 
Python basic
Python basicPython basic
Python basic
 
Python basic
Python basic Python basic
Python basic
 
app4.pptx
app4.pptxapp4.pptx
app4.pptx
 
Profiling and optimization
Profiling and optimizationProfiling and optimization
Profiling and optimization
 
So I am writing a CS code for a project and I keep getting cannot .pdf
So I am writing a CS code for a project and I keep getting cannot .pdfSo I am writing a CS code for a project and I keep getting cannot .pdf
So I am writing a CS code for a project and I keep getting cannot .pdf
 
Music as data
Music as dataMusic as data
Music as data
 
ITT 2015 - Saul Mora - Object Oriented Function Programming
ITT 2015 - Saul Mora - Object Oriented Function ProgrammingITT 2015 - Saul Mora - Object Oriented Function Programming
ITT 2015 - Saul Mora - Object Oriented Function Programming
 
III MCS python lab (1).pdf
III MCS python lab (1).pdfIII MCS python lab (1).pdf
III MCS python lab (1).pdf
 
第二讲 预备-Python基礎
第二讲 预备-Python基礎第二讲 预备-Python基礎
第二讲 预备-Python基礎
 
第二讲 Python基礎
第二讲 Python基礎第二讲 Python基礎
第二讲 Python基礎
 
Basic NLP with Python and NLTK
Basic NLP with Python and NLTKBasic NLP with Python and NLTK
Basic NLP with Python and NLTK
 
The (unknown) collections module
The (unknown) collections moduleThe (unknown) collections module
The (unknown) collections module
 

More from Facultad de Ciencias y Sistemas

Ejercicios HTML 5
Ejercicios HTML 5Ejercicios HTML 5
CSS3
CSS3CSS3
09 ordenamiento-en-vectores-en-c
09 ordenamiento-en-vectores-en-c09 ordenamiento-en-vectores-en-c
09 ordenamiento-en-vectores-en-c
Facultad de Ciencias y Sistemas
 
08 mas-de-vectores-en-c
08 mas-de-vectores-en-c08 mas-de-vectores-en-c
08 mas-de-vectores-en-c
Facultad de Ciencias y Sistemas
 
07 vectores-en-c final
07 vectores-en-c final07 vectores-en-c final
07 vectores-en-c final
Facultad de Ciencias y Sistemas
 
06 clases-en-c
06 clases-en-c06 clases-en-c
05 cadenas-de-caracteres-en-c
05 cadenas-de-caracteres-en-c05 cadenas-de-caracteres-en-c
05 cadenas-de-caracteres-en-c
Facultad de Ciencias y Sistemas
 
04 mas-estructuras-iterativas-en-c
04 mas-estructuras-iterativas-en-c04 mas-estructuras-iterativas-en-c
04 mas-estructuras-iterativas-en-c
Facultad de Ciencias y Sistemas
 
03 estructuras-iterativas-en-c
03 estructuras-iterativas-en-c03 estructuras-iterativas-en-c
03 estructuras-iterativas-en-c
Facultad de Ciencias y Sistemas
 
02 mas-de-las-estructuras-de-programacion-en-c
02 mas-de-las-estructuras-de-programacion-en-c02 mas-de-las-estructuras-de-programacion-en-c
02 mas-de-las-estructuras-de-programacion-en-c
Facultad de Ciencias y Sistemas
 
01 estructuras-de-programacion-en-c
01 estructuras-de-programacion-en-c01 estructuras-de-programacion-en-c
01 estructuras-de-programacion-en-c
Facultad de Ciencias y Sistemas
 
Actividades de aprendizaje en Moodle
Actividades de aprendizaje en MoodleActividades de aprendizaje en Moodle
Actividades de aprendizaje en Moodle
Facultad de Ciencias y Sistemas
 
Creación de grupos en Moodle
Creación de grupos en MoodleCreación de grupos en Moodle
Creación de grupos en Moodle
Facultad de Ciencias y Sistemas
 
Introducción a la progrogramación orientada a objetos con Java
Introducción a la progrogramación orientada a objetos con JavaIntroducción a la progrogramación orientada a objetos con Java
Introducción a la progrogramación orientada a objetos con Java
Facultad de Ciencias y Sistemas
 
Como crear un diagrama de clases
Como crear un diagrama de clasesComo crear un diagrama de clases
Como crear un diagrama de clases
Facultad de Ciencias y Sistemas
 
Diagrama de clases - Ejemplo monográfico 02
Diagrama de clases - Ejemplo monográfico 02Diagrama de clases - Ejemplo monográfico 02
Diagrama de clases - Ejemplo monográfico 02
Facultad de Ciencias y Sistemas
 
Diagrama de clases - Ejemplo monográfico 01
Diagrama de clases - Ejemplo monográfico 01Diagrama de clases - Ejemplo monográfico 01
Diagrama de clases - Ejemplo monográfico 01
Facultad de Ciencias y Sistemas
 
Otro ejemplo de diagrama de clases UML
Otro ejemplo de diagrama de clases UMLOtro ejemplo de diagrama de clases UML
Otro ejemplo de diagrama de clases UML
Facultad de Ciencias y Sistemas
 
Un ejemplo de diagrama de clases
Un ejemplo de diagrama de clasesUn ejemplo de diagrama de clases
Un ejemplo de diagrama de clases
Facultad de Ciencias y Sistemas
 
Casos de estudio para diagramas de clases
Casos de estudio para diagramas de clasesCasos de estudio para diagramas de clases
Casos de estudio para diagramas de clases
Facultad de Ciencias y Sistemas
 

More from Facultad de Ciencias y Sistemas (20)

Ejercicios HTML 5
Ejercicios HTML 5Ejercicios HTML 5
Ejercicios HTML 5
 
CSS3
CSS3CSS3
CSS3
 
09 ordenamiento-en-vectores-en-c
09 ordenamiento-en-vectores-en-c09 ordenamiento-en-vectores-en-c
09 ordenamiento-en-vectores-en-c
 
08 mas-de-vectores-en-c
08 mas-de-vectores-en-c08 mas-de-vectores-en-c
08 mas-de-vectores-en-c
 
07 vectores-en-c final
07 vectores-en-c final07 vectores-en-c final
07 vectores-en-c final
 
06 clases-en-c
06 clases-en-c06 clases-en-c
06 clases-en-c
 
05 cadenas-de-caracteres-en-c
05 cadenas-de-caracteres-en-c05 cadenas-de-caracteres-en-c
05 cadenas-de-caracteres-en-c
 
04 mas-estructuras-iterativas-en-c
04 mas-estructuras-iterativas-en-c04 mas-estructuras-iterativas-en-c
04 mas-estructuras-iterativas-en-c
 
03 estructuras-iterativas-en-c
03 estructuras-iterativas-en-c03 estructuras-iterativas-en-c
03 estructuras-iterativas-en-c
 
02 mas-de-las-estructuras-de-programacion-en-c
02 mas-de-las-estructuras-de-programacion-en-c02 mas-de-las-estructuras-de-programacion-en-c
02 mas-de-las-estructuras-de-programacion-en-c
 
01 estructuras-de-programacion-en-c
01 estructuras-de-programacion-en-c01 estructuras-de-programacion-en-c
01 estructuras-de-programacion-en-c
 
Actividades de aprendizaje en Moodle
Actividades de aprendizaje en MoodleActividades de aprendizaje en Moodle
Actividades de aprendizaje en Moodle
 
Creación de grupos en Moodle
Creación de grupos en MoodleCreación de grupos en Moodle
Creación de grupos en Moodle
 
Introducción a la progrogramación orientada a objetos con Java
Introducción a la progrogramación orientada a objetos con JavaIntroducción a la progrogramación orientada a objetos con Java
Introducción a la progrogramación orientada a objetos con Java
 
Como crear un diagrama de clases
Como crear un diagrama de clasesComo crear un diagrama de clases
Como crear un diagrama de clases
 
Diagrama de clases - Ejemplo monográfico 02
Diagrama de clases - Ejemplo monográfico 02Diagrama de clases - Ejemplo monográfico 02
Diagrama de clases - Ejemplo monográfico 02
 
Diagrama de clases - Ejemplo monográfico 01
Diagrama de clases - Ejemplo monográfico 01Diagrama de clases - Ejemplo monográfico 01
Diagrama de clases - Ejemplo monográfico 01
 
Otro ejemplo de diagrama de clases UML
Otro ejemplo de diagrama de clases UMLOtro ejemplo de diagrama de clases UML
Otro ejemplo de diagrama de clases UML
 
Un ejemplo de diagrama de clases
Un ejemplo de diagrama de clasesUn ejemplo de diagrama de clases
Un ejemplo de diagrama de clases
 
Casos de estudio para diagramas de clases
Casos de estudio para diagramas de clasesCasos de estudio para diagramas de clases
Casos de estudio para diagramas de clases
 

Recently uploaded

Instructions for Submissions thorugh G- Classroom.pptx
Instructions for Submissions thorugh G- Classroom.pptxInstructions for Submissions thorugh G- Classroom.pptx
Instructions for Submissions thorugh G- Classroom.pptx
Jheel Barad
 
Overview on Edible Vaccine: Pros & Cons with Mechanism
Overview on Edible Vaccine: Pros & Cons with MechanismOverview on Edible Vaccine: Pros & Cons with Mechanism
Overview on Edible Vaccine: Pros & Cons with Mechanism
DeeptiGupta154
 
CACJapan - GROUP Presentation 1- Wk 4.pdf
CACJapan - GROUP Presentation 1- Wk 4.pdfCACJapan - GROUP Presentation 1- Wk 4.pdf
CACJapan - GROUP Presentation 1- Wk 4.pdf
camakaiclarkmusic
 
Model Attribute Check Company Auto Property
Model Attribute  Check Company Auto PropertyModel Attribute  Check Company Auto Property
Model Attribute Check Company Auto Property
Celine George
 
TESDA TM1 REVIEWER FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
TESDA TM1 REVIEWER  FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...TESDA TM1 REVIEWER  FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
TESDA TM1 REVIEWER FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
EugeneSaldivar
 
Operation Blue Star - Saka Neela Tara
Operation Blue Star   -  Saka Neela TaraOperation Blue Star   -  Saka Neela Tara
Operation Blue Star - Saka Neela Tara
Balvir Singh
 
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXXPhrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
MIRIAMSALINAS13
 
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
Nguyen Thanh Tu Collection
 
Introduction to AI for Nonprofits with Tapp Network
Introduction to AI for Nonprofits with Tapp NetworkIntroduction to AI for Nonprofits with Tapp Network
Introduction to AI for Nonprofits with Tapp Network
TechSoup
 
Honest Reviews of Tim Han LMA Course Program.pptx
Honest Reviews of Tim Han LMA Course Program.pptxHonest Reviews of Tim Han LMA Course Program.pptx
Honest Reviews of Tim Han LMA Course Program.pptx
timhan337
 
The Accursed House by Émile Gaboriau.pptx
The Accursed House by Émile Gaboriau.pptxThe Accursed House by Émile Gaboriau.pptx
The Accursed House by Émile Gaboriau.pptx
DhatriParmar
 
Guidance_and_Counselling.pdf B.Ed. 4th Semester
Guidance_and_Counselling.pdf B.Ed. 4th SemesterGuidance_and_Counselling.pdf B.Ed. 4th Semester
Guidance_and_Counselling.pdf B.Ed. 4th Semester
Atul Kumar Singh
 
The basics of sentences session 5pptx.pptx
The basics of sentences session 5pptx.pptxThe basics of sentences session 5pptx.pptx
The basics of sentences session 5pptx.pptx
heathfieldcps1
 
Digital Tools and AI for Teaching Learning and Research
Digital Tools and AI for Teaching Learning and ResearchDigital Tools and AI for Teaching Learning and Research
Digital Tools and AI for Teaching Learning and Research
Vikramjit Singh
 
A Strategic Approach: GenAI in Education
A Strategic Approach: GenAI in EducationA Strategic Approach: GenAI in Education
A Strategic Approach: GenAI in Education
Peter Windle
 
Additional Benefits for Employee Website.pdf
Additional Benefits for Employee Website.pdfAdditional Benefits for Employee Website.pdf
Additional Benefits for Employee Website.pdf
joachimlavalley1
 
The Challenger.pdf DNHS Official Publication
The Challenger.pdf DNHS Official PublicationThe Challenger.pdf DNHS Official Publication
The Challenger.pdf DNHS Official Publication
Delapenabediema
 
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
Levi Shapiro
 
Palestine last event orientationfvgnh .pptx
Palestine last event orientationfvgnh .pptxPalestine last event orientationfvgnh .pptx
Palestine last event orientationfvgnh .pptx
RaedMohamed3
 
Acetabularia Information For Class 9 .docx
Acetabularia Information For Class 9  .docxAcetabularia Information For Class 9  .docx
Acetabularia Information For Class 9 .docx
vaibhavrinwa19
 

Recently uploaded (20)

Instructions for Submissions thorugh G- Classroom.pptx
Instructions for Submissions thorugh G- Classroom.pptxInstructions for Submissions thorugh G- Classroom.pptx
Instructions for Submissions thorugh G- Classroom.pptx
 
Overview on Edible Vaccine: Pros & Cons with Mechanism
Overview on Edible Vaccine: Pros & Cons with MechanismOverview on Edible Vaccine: Pros & Cons with Mechanism
Overview on Edible Vaccine: Pros & Cons with Mechanism
 
CACJapan - GROUP Presentation 1- Wk 4.pdf
CACJapan - GROUP Presentation 1- Wk 4.pdfCACJapan - GROUP Presentation 1- Wk 4.pdf
CACJapan - GROUP Presentation 1- Wk 4.pdf
 
Model Attribute Check Company Auto Property
Model Attribute  Check Company Auto PropertyModel Attribute  Check Company Auto Property
Model Attribute Check Company Auto Property
 
TESDA TM1 REVIEWER FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
TESDA TM1 REVIEWER  FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...TESDA TM1 REVIEWER  FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
TESDA TM1 REVIEWER FOR NATIONAL ASSESSMENT WRITTEN AND ORAL QUESTIONS WITH A...
 
Operation Blue Star - Saka Neela Tara
Operation Blue Star   -  Saka Neela TaraOperation Blue Star   -  Saka Neela Tara
Operation Blue Star - Saka Neela Tara
 
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXXPhrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
Phrasal Verbs.XXXXXXXXXXXXXXXXXXXXXXXXXX
 
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
BÀI TẬP BỔ TRỢ TIẾNG ANH GLOBAL SUCCESS LỚP 3 - CẢ NĂM (CÓ FILE NGHE VÀ ĐÁP Á...
 
Introduction to AI for Nonprofits with Tapp Network
Introduction to AI for Nonprofits with Tapp NetworkIntroduction to AI for Nonprofits with Tapp Network
Introduction to AI for Nonprofits with Tapp Network
 
Honest Reviews of Tim Han LMA Course Program.pptx
Honest Reviews of Tim Han LMA Course Program.pptxHonest Reviews of Tim Han LMA Course Program.pptx
Honest Reviews of Tim Han LMA Course Program.pptx
 
The Accursed House by Émile Gaboriau.pptx
The Accursed House by Émile Gaboriau.pptxThe Accursed House by Émile Gaboriau.pptx
The Accursed House by Émile Gaboriau.pptx
 
Guidance_and_Counselling.pdf B.Ed. 4th Semester
Guidance_and_Counselling.pdf B.Ed. 4th SemesterGuidance_and_Counselling.pdf B.Ed. 4th Semester
Guidance_and_Counselling.pdf B.Ed. 4th Semester
 
The basics of sentences session 5pptx.pptx
The basics of sentences session 5pptx.pptxThe basics of sentences session 5pptx.pptx
The basics of sentences session 5pptx.pptx
 
Digital Tools and AI for Teaching Learning and Research
Digital Tools and AI for Teaching Learning and ResearchDigital Tools and AI for Teaching Learning and Research
Digital Tools and AI for Teaching Learning and Research
 
A Strategic Approach: GenAI in Education
A Strategic Approach: GenAI in EducationA Strategic Approach: GenAI in Education
A Strategic Approach: GenAI in Education
 
Additional Benefits for Employee Website.pdf
Additional Benefits for Employee Website.pdfAdditional Benefits for Employee Website.pdf
Additional Benefits for Employee Website.pdf
 
The Challenger.pdf DNHS Official Publication
The Challenger.pdf DNHS Official PublicationThe Challenger.pdf DNHS Official Publication
The Challenger.pdf DNHS Official Publication
 
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
June 3, 2024 Anti-Semitism Letter Sent to MIT President Kornbluth and MIT Cor...
 
Palestine last event orientationfvgnh .pptx
Palestine last event orientationfvgnh .pptxPalestine last event orientationfvgnh .pptx
Palestine last event orientationfvgnh .pptx
 
Acetabularia Information For Class 9 .docx
Acetabularia Information For Class 9  .docxAcetabularia Information For Class 9  .docx
Acetabularia Information For Class 9 .docx
 

Procesamiento del lenguaje natural con python

  • 1. Inteligencia Artificial Lenguaje Python: Procesamiento del Lenguaje Natural. Universidad Nacional de Ingeniería FACULTAD DE CIENCIAS Y SISTEMAS – DEPARTAMENTO DE INFORMÁTICA
  • 2.
  • 3. Procesamiento del Lenguaje Natural con Python. Recopilación de datos de archivos PDF. !pip install PyPDF2 import PyPDF2 from PyPDF2 import PdfFileReader pdf = open("file.pdf","rb") pdf_reader = PyPDF2.PdfFileReader(pdf) print(pdf_reader.numPages) page = pdf_reader.getPage(0) print(page.extractText()) pdf.close() Recopilación de datos de archivos Word. !pip install docx from docx import Document doc = open("file.docx","rb") document = docx.Document(doc) docu="" for para in document.paragraphs: docu += para.text print(docu) Recopilación de datos de archivos HTML. !pip install bs4 import urllib.request as urllib2 from bs4 import BeautifulSoup response = urllib2.urlopen('https://en.wikipedia.org/wiki/Natural_language_processing') html_doc = response.read() soup = BeautifulSoup(html_doc, 'html.parser') strhtm = soup.prettify() print (strhtm[:1000]) print(soup.title) print(soup.title.string) print(soup.a.string) print(soup.b.string)
  • 4. # Extrayendo todas las instancias de una etiqueta particular for x in soup.find_all('a'): print(x.string) # Extrayendo todo el texto de una etiqueta particular for x in soup.find_all('p'): print(x.text) Arañando texto desde la web. Antes de “escarbar o arañar” cualquier sitio web, blog o comercio electrónico sitios web, asegúrese de leer los términos y condiciones del sitio web sobre si otorga permisos para escarbar sus datos. !pip install bs4 !pip install requests from bs4 import BeautifulSoup import requests import pandas as pd from pandas import Series, DataFrame from ipywidgets import FloatProgress from time import sleep from IPython.display import display import re import pickle url = 'http://www.imdb.com/chart/top?ref_=nv_mv_250_6' result = requests.get(url) c = result.content soup = BeautifulSoup(c,"lxml")' summary = soup.find('div',{'class':'article'}) # Creando listas vacías para agregar los datos extraídos. moviename = [] cast = [] description = [] rating = [] ratingoutof = [] year = [] genre = [] movielength = [] rot_audscore = [] rot_avgrating = [] rot_users = [] # Extrayendo los datos requeridos del soup html. rgx = re.compile('[%s]' % '()')
  • 5. f = FloatProgress(min=0, max=250) display(f) for row,i in zip(summary.find('table'). findAll('tr'),range(len(summary.find('table').findAll('tr')))): for sitem in row.findAll('span',{'class':'secondaryInfo'}): s = sitem.find(text=True) year.append(rgx.sub(", s)) for ritem in row.findAll('td',{'class':'ratingColumn imdbRating'}): for iget in ritem.findAll('strong'): rating.append(iget.find(text=True)) ratingoutof.append(iget.get('title').split(' ', 4)[3]) for item in row.findAll('td',{'class':'titleColumn'}): for href in item.findAll('a',href=True): moviename.append(href.find(text=True)) rurl = 'https://www.rottentomatoes.com/m/'+ href. find(text=True) try: rresult = requests.get(rurl) except requests.exceptions.ConnectionError: status_code = "Connection refused" rc = rresult.content rsoup = BeautifulSoup(rc) try: rot_audscore.append(rsoup.find('div', {'class':'meter-value'}).find('span', {'class':'superPageFontColor'}).text) rot_avgrating.append(rsoup.find('div', {'class':'audience-info hidden-xs superPageFontColor'}).find('div').contents[2]. strip()) rot_users.append(rsoup.find('div', {'class':'audience-info hidden-xs superPageFontColor'}).contents[3].contents[2]. strip()) except AttributeError: rot_audscore.append("") rot_avgrating.append("") rot_users.append("") cast.append(href.get('title')) imdb = "http://www.imdb.com" + href.get('href') try: iresult = requests.get(imdb) ic = iresult.content
  • 6. isoup = BeautifulSoup(ic) description.append(isoup.find('div', {'class':'summary_text'}).find(text=True).strip()) genre.append(isoup.find('span',{'class':'itempr op'}).find(text=True)) movielength.append(isoup.find('time', {'itemprop':'duration'}).find(text=True).strip()) except requests.exceptions.ConnectionError: description.append("") genre.append("") movielength.append("") sleep(.1) f.value = i # Listar a pandas series moviename = Series(moviename) cast = Series(cast) description = Series(description) rating = Series(rating) ratingoutof = Series(ratingoutof) year = Series(year) genre = Series(genre) movielength = Series(movielength) rot_audscore = Series(rot_audscore) rot_avgrating = Series(rot_avgrating) rot_users = Series(rot_users) # creando un dataframe y analizándolo imdb_df = pd.concat([moviename,year,description,genre, movielength,cast,rating,ratingoutof, rot_audscore,rot_avgrating,rot_users],axis=1) imdb_df.columns = [ 'moviename','year','description','genre', 'movielength','cast','imdb_rating', 'imdb_ratingbasedon','tomatoes_audscore', 'tomatoes_rating','tomatoes_ratingbasedon'] imdb_df['rank'] = imdb_df.index + 1 imdb_df.head(1) # Guardando el archivo como CSV. imdb_df.to_csv("imdbdataexport.csv")
  • 7. Generando N-gramas usando TextBlob. Text = "Estoy aprendiendo PNL" #Importando textblob from textblob import TextBlob #Para unigram : Use n = 1 TextBlob(Text).ngrams(1) TextBlob(Text).ngrams(2) Extraer frases nominales. #Importando nltk import nltk from textblob import TextBlob #Extrayendo elementos blob = TextBlob("John está aprendiendo el procesamiento del lenguaje natural") for np in blob.noun_phrases: print(np) Encontrar similitudes entre textos documents = ("Me gusta la PNL", "Estoy explorando PNL", "Soy un principiante en PNL", "Quiero aprender PNL", "Me gusta la PNL avanzada") #Importando bibliotecas from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity #Compute tfidf: método de ingeniería de características llamado matriz de coincidencia. tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(documents) tfidf_matrix.shape # Salida # Calcular similitud para la primera oración con el resto de las oraciones cosine_similarity(tfidf_matrix[0:1],tfidf_matrix) # Salida
  • 8. Correspondencia fonética !pip install fuzzy import fuzzy soundex = fuzzy.Soundex(4) soundex('natural') #salida 'N364' soundex('natuaral') 104 #salida 'N364' soundex('language') #salida 'L52' soundex('processing') #salida 'P625' Etiquetar partes del lenguaje. Text = "I love NLP and I will learn NLP in 2 month" import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize, sent_tokenize stop_words = set(stopwords.words('english')) # Tokenizar el texto tokens = sent_tokenize(text) #Genera etiquetado para todos los tokens usando loop for i in tokens: words = nltk.word_tokenize(i) words = [w for w in words if not w in stop_words] # Etiquetador. tags = nltk.pos_tag(words) tags #salida: [('I', 'PRP'), ('love', 'VBP'), ('NLP', 'NNP'), ('I', 'PRP'), ('learn', 'VBP'), ('NLP', 'RB'),
  • 9. ('2month', 'CD')] • CC coordinating conjunction. • CD cardinal digit. • DT determiner. • EX existential there (like: “there is” ... think of it like • “there exists”.) • FW foreign word. • IN preposition/subordinating conjunction. • JJ adjective ‘big’. • JJR adjective, comparative ‘bigger’. • JJS adjective, superlative ‘biggest’. • LS list marker 1.) • MD modal could, will. • NN noun, singular ‘desk’. • NNS noun plural ‘desks’. • NNP proper noun, singular ‘Harrison’. • NNPS proper noun, plural ‘Americans’. • PDT predeterminer ‘all the kids’. • POS possessive ending parent’s. • PRP personal pronoun I, he, she. • PRP$ possessive pronoun my, his, hers. • RB adverb very, silently. • RBR adverb, comparative better. • RBS adverb, superlative best. • RP particle give up. • TO to go ‘to’ the store. • UH interjection. • VB verb, base form take. • VBD verb, past tense took. • VBG verb, gerund/present participle taking. • VBN verb, past participle taken. • VBP verb, sing. present, non-3d take. • VBZ verb, 3rd person sing. present takes. • WDT wh-determiner which. • WP wh-pronoun who, what. • WP$ possessive wh-pronoun whose. • WRB wh-adverb where, when.
  • 10. Extraer entidades del texto. sent = "John is studying at Stanford University in California" import nltk from nltk import ne_chunk from nltk import word_tokenize ne_chunk(nltk.pos_tag(word_tokenize(sent)), binary=False) #salida Tree('S', [Tree('PERSON', [('John', 'NNP')]), ('is', 'VBZ'), ('studying', 'VBG'), ('at', 'IN'), Tree('ORGANIZATION', [('Stanford', 'NNP'), ('University', 'NNP')]), ('in', 'IN'), Tree('GPE', [('California', 'NNP')])]) Here "John" is tagged as "PERSON" "Stanford" as "ORGANIZATION" "California" as "GPE". Geopolitical entity, i.e. countries, cities, states. #Usando SpaCy import spacy nlp = spacy.load('en') # Leer o crear una sentencia doc = nlp(u'Apple is ready to launch new phone worth $10000 in New york time square ') for ent in doc.ents: print(ent.text, ent.start_char, ent.end_char, ent.label_) #salida Apple 0 5 ORG 10000 42 47 MONEY New york 51 59 GPE Análisis de sentimientos. Para hacer un análisis de sentimientos use TextBlob. El cual, básicamente dará 2 métricas. 1. Polaridad = La polaridad se encuentra en el rango de [-1,1] donde 1 significa un enunciado positivo y -1 significa una sentencia negativa. 2. Subjetividad = Subjetividad se refiere a que principalmente es una opinión personal y no una información objetiva [0,1]. review = "I like this phone. screen quality and camera clarity is really good." review2 = "This tv is not good. Bad quality, no clarity, worst experience" from textblob import TextBlob #TextBlob tiene un modelo de predicción de sentimientos previamente entrenado blob = TextBlob(review)
  • 11. blob.sentiment #salida Sentiment(polarity=0.7, subjectivity=0.6000000000000001) blob = TextBlob(review2) blob.sentiment #salida Sentiment(polarity=-0.6833333333333332, subjectivity=0.7555555555555555) # Esta es una crítica negativa, ya que la polaridad es "-0.68". Texto ambiguo. Text1 = 'I went to the bank to deposit my money' Text2 = 'The river bank was full of dead fishes' #Instalar pywsd !pip install pywsd #Importar funciones from nltk.corpus import wordnet as wn from nltk.stem import PorterStemmer from itertools import chain from pywsd.lesk import simple_lesk # Frases bank_sents = ['I went to the bank to deposit my money', 'The river bank was full of dead fishes'] # llamando a la función lesk e imprimiendo resultados tanto ambas frases print ("Context-1:", bank_sents[0]) answer = simple_lesk(bank_sents[0],'bank') print ("Sense:", answer) print ("Definition : ", answer.definition()) print ("Context-2:", bank_sents[1]) answer = simple_lesk(bank_sents[1],'bank','n') print ("Sense:", answer) print ("Definition : ", answer.definition()) #Resultado: Context-1: I went to the bank to deposit my money Sense: Synset('depository_financial_institution.n.01') Definition : a financial institution that accepts deposits and channels the money into lending activities Context-2: The river bank was full of dead fishes Sense: Synset('bank.n.01') Definition : sloping land (especially the slope beside a body of water)
  • 12. Convertir voz a texto. !pip install SpeechRecognition !pip install PyAudio import speech_recognition as sr r=sr.Recognizer() with sr.Microphone() as source: print("Please say something") audio = r.listen(source) print("Time over, thanks") try: print("I think you said: "+r.recognize_google(audio)); #print("I think you said: "+r.recognize_google(audio, language ='en-EN')); #except sr.UnknownValueError: #print("Google Speech Recognition could not understand audio") #except sr.RequestError as e: #print("Could not request results from Google Speech #Recognition service; {0}".format(e)) except: pass; #Salida Please say something Time over, thanks I think you said: I am learning natural language processing Convertir texto a voz. !pip install gTTS from gtts import gTTS #elija el lenguaje, English('en') convert = gTTS(text='I like this NLP book', lang='en', slow=False) # Guardando el audio en un archivo mp3 llamado myobj.save("audio.mp3") #output: Por favor reproduzca el archivo audio.mp3 guardado localmente en su máquina para escucharlo Traducción de voz. !pip install goslate import goslate text = "Bonjour le monde" gs = goslate.Goslate() translatedText = gs.translate(text,'en') print(translatedText)