SlideShare a Scribd company logo
Comcast Consumer Complaints
A first approach in R language
Olabanji Shonibare
oyshonib@mtu.edu
Natural Language Processing (NLP)
NLP is a set of techniques for approaching text
problems
Natural Language Processing (NLP)
A few questions
• Word frequency
• Variation across states
Comcast Consumer Complaints
• comcast_consumeraffairs_complaints.csv
• comcast_fcc_complaints_2015.csv
Raw complaint data about Comcast television and internet
published at consumeraffairs.com between 04/08 and 09/16.
Raw complaints made to the FCC about Comcast between 04/15
and 06/15.
Preliminaries
df	<-	read.csv("comcast_consumeraffairs_complaints.csv")

df_fcc	<-	read.csv("comcast_fcc_complaints_2015.csv")



dim(df)	
##	[1]	5659				4	
names(df)	
##	[1]	"author"				"posted_on"	"rating"				"text"	
dim(df_fcc)	
##	[1]	2225			11	
names(df_fcc)	
##		[1]	"Ticket.."																				"Customer.Complaint"									

##		[3]	"Date"																								"Time"																							

##		[5]	"Received.Via"																"City"																							

##		[7]	"State"																							"Zip.code"																			

##		[9]	"Status"																						"Filing.on.Behalf.of.Someone"

Comcast Consumer Affairs Complaints
ggplot(df)	+

		geom_bar(	aes(x=rating))
df	%>%	group_by(rating)	%>%	summarise(count	=	n())	
##	#	A	tibble:	6	×	2

##			rating	count

##				<int>	<int>

##	1						0		1560

##	2						1		3734

##	3						2			260

##	4						3				54

##	5						4				19

##	6						5				32	
df2	<-	df	%>%	filter(rating	!=	0)



ggplot(df2)	+

		geom_bar(	aes(x=rating))
df3	<-

		df2	%>%	

		mutate(	State	=	str_sub(toupper(author),	-2))	

		

df3	%>%

		group_by(State)	%>%

		summarise(Count	=	n())	%>%

		arrange(desc(Count))	
##	#	A	tibble:	52	×	2

##				State	Count

##				<chr>	<int>

##	1					FL			650

##	2					CA			345

##	3					GA			320

##	4					IL			284

##	5					PA			221

##	6					TN			202

##	7					TX			193

##	8					MI			189

##	9					WA			168

##	10				NJ			167

##	#	...	with	42	more	rows
low_rating	<-

df2	%>%	

		filter(rating	<	3)



high_rating	<-

df2	%>%	

		filter(rating	>=	3)	
nrow(low_rating)	
##	[1]	3994	
nrow(high_rating)	
##	[1]	105	
#cs_ratio:	customer	satisfaction	ratio



df3	%>%

		select(State,	rating)	%>%

		group_by(State)	%>%

		summarise(cs_ratio	=	length(rating[rating>2])/length(rating))	%>%

		arrange(desc(cs_ratio))	
##	#	A	tibble:	52	×	2

##				State			cs_ratio

##				<chr>						<dbl>

##	1					IA	1.00000000

##	2					ID	1.00000000

##	3					BC	0.50000000

##	4					NV	0.33333333

##	5					WV	0.13333333

##	6					NH	0.10714286

##	7					MO	0.09090909

##	8					ER	0.06666667

##	9					SC	0.05263158

##	10				AZ	0.05000000

##	#	...	with	42	more	rows	
States with high customer satisfaction ratio (rating >2)
Word cloud for low ratings
low_stops	<-	c('comcast',	stopwords("english"))



low_ratingCorpus	<-	

		Corpus(VectorSource(low_rating$text))	%>%

		tm_map(removePunctuation)	%>%

		tm_map(removeNumbers)	%>%

		tm_map(tolower)		%>%

		tm_map(removeWords,	low_stops)	%>%

		tm_map(removeWords,	stopwords("english"))	%>%

		tm_map(stripWhitespace)	%>%

		tm_map(PlainTextDocument)

		#tm_map(stemDocument)



wordcloud(low_ratingCorpus,	scale=c(5,0.5),	max.words=100,	random.order=FALSE,	
rot.per=0.35,	use.r.layout=FALSE,	colors=brewer.pal(8,	"Dark2"))
Word cloud for low ratings
low_stops	<-	c('comcast',	stopwords("english"))



low_ratingCorpus	<-	

		Corpus(VectorSource(low_rating$text))	%>%

		tm_map(removePunctuation)	%>%

		tm_map(removeNumbers)	%>%

		tm_map(tolower)		%>%

		tm_map(removeWords,	low_stops)	%>%

		tm_map(removeWords,	stopwords("english"))	%>%

		tm_map(stripWhitespace)	%>%

		tm_map(PlainTextDocument)

		#tm_map(stemDocument)



wordcloud(low_ratingCorpus,	scale=c(5,0.5),	max.words=100,	random.order=FALSE,	
rot.per=0.35,	use.r.layout=FALSE,	colors=brewer.pal(8,	"Dark2"))
Word cloud for high ratings
temp_stops	<-	c('comcast',	stopwords("english"))



high_ratingCorpus	<-	

		Corpus(VectorSource(high_rating$text))	%>%

		tm_map(removePunctuation)	%>%

		tm_map(removeNumbers)	%>%

		tm_map(tolower)		%>%

		#tm_map(removeWords,	low_rating_stops)	%>%

		tm_map(removeWords,	temp_stops)	%>%

		tm_map(stripWhitespace)	%>%

		tm_map(PlainTextDocument)

		#tm_map(stemDocument)



wordcloud(high_ratingCorpus,	scale=c(5,0.5),	max.words=100,		
random.order=FALSE,	rot.per=0.35,	use.r.layout=FALSE,	colors=brewer.pal(8,	"Dark2"))
Comcast Fcc Complaints (2015)
#	State	vs	number	of	complaints

temp1	<-

df_fcc	%>%	

		group_by(State)	%>%	

		summarise(no_complaints	=	n())	%>%

		arrange(desc(no_complaints))



#temp1



ggplot(temp1,	aes(x=State,	y=no_complaints))	+

		geom_bar(	stat	="identity")	+

		scale_x_discrete(limits	=	temp1[["State"]])	+

		theme(axis.text.x	=	element_text(angle	=	60,	hjust	=	1))
#	grab	the	first	10	rows

temp2	<-

temp1	%>%

		slice(1:10)	



ggplot(temp2,	aes(x=State,	y=no_complaints))	+

		geom_bar(	stat	="identity")	+

		scale_x_discrete(limits	=	temp2[["State"]])	+

		theme(axis.text.x	=	element_text(angle	=	60,	hjust	=	1))
temp3	<-

df_fcc	%>%	

		group_by(City)	%>%	

		summarise(no_complaints	=	n())	%>%

		arrange(desc(no_complaints))

##	#	A	tibble:	928	×	2

##												City	no_complaints

##										<fctr>									<int>

##	1							Atlanta												63

##	2							Chicago												47

##	3					Knoxville												36

##	4							Houston												33

##	5		Jacksonville												31

##	6		Philadelphia												25

##	7								Denver												22

##	8									Miami												22

##	9					Nashville												22

##	10	Indianapolis												21

##	#	...	with	918	more	rows
ggplot(temp3,	aes(x=City,	y=no_complaints))	+

		geom_bar(	stat	="identity")	+

		scale_x_discrete(limits	=	temp3[["City"]])	+

		theme(axis.text.x	=	element_text(angle	=	60,	hjust	=	1,	size	=	1))
temp4	<-

temp3	%>%

		slice(1:20)	

##	#	A	tibble:	20	×	2

##													City	no_complaints

##											<fctr>									<int>

##	1								Atlanta												63

##	2								Chicago												47

##	3						Knoxville												36

##	4								Houston												33

##	5			Jacksonville												31

##	6			Philadelphia												25

##	7									Denver												22

##	8										Miami												22

##	9						Nashville												22

##	10		Indianapolis												21

##	11	San	Francisco												20

##	12						San	Jose												20

##	13					Baltimore												19

##	14								Tucson												19

##	15				Washington												19

##	16						Marietta												16

##	17						Portland												16

##	18							Seattle												14

##	19							Memphis												13

##	20								Canton												12
ggplot(temp4,	aes(x=City,	y=no_complaints))	+

		geom_bar(	stat	="identity")	+

		scale_x_discrete(limits	=	temp4[["City"]])	+

		theme(axis.text.x	=	element_text(angle	=	60,	hjust	=	1))
df_fcc	%>%	

		group_by(Customer.Complaint)	%>%	

		summarise(no_complaints	=	n())	%>%

		arrange(desc(no_complaints))	
##	#	A	tibble:	1,842	×	2

##										Customer.Complaint	no_complaints

##																						<fctr>									<int>

##	1																			Comcast												83

##	2										Comcast	Internet												18

##	3										Comcast	Data	Cap												17

##	4																			comcast												13

##	5											Comcast	Billing												11

##	6									Comcast	Data	Caps												11

##	7																	Data	Caps												11

##	8		Unfair	Billing	Practices													9

##	9										Comcast	data	cap													8

##	10								Comcast	data	caps													8

##	#	...	with	1,832	more	rows
all_stops	<-	c(‘comcast','now','company','day','someone','thing','also',	
'got','way','call','called','one','said','tell',	stopwords("english"))



df_fccCorpus	<-	

		Corpus(VectorSource(df_fcc$Customer.Complaint))	%>%

		tm_map(removePunctuation)	%>%

		tm_map(removeNumbers)	%>%

		tm_map(tolower)		%>%

		tm_map(removeWords,	all_stops)	%>%

		tm_map(stripWhitespace)	%>%

		tm_map(PlainTextDocument)

		#tm_map(stemDocument)



wordcloud(df_fccCorpus,	scale=c(5,0.5),	max.words=100,	random.order=FALSE,		
rot.per=0.35,	use.r.layout=FALSE,	colors=brewer.pal(8,	"Dark2"))
https://www.kaggle.com/dan195/d/archaeocharlie/
comcastcomplaints/first-run
Credits:

More Related Content

Viewers also liked

NLP& Bigdata. Motivation and Action
NLP& Bigdata. Motivation and ActionNLP& Bigdata. Motivation and Action
NLP& Bigdata. Motivation and Action
Sarath P R
 
Text analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco ControlText analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco Control
Ben Healey
 
Introducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with rIntroducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with r
Vivian S. Zhang
 
Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)
fridolin.wild
 
Natural Language Processing with Python
Natural Language Processing with PythonNatural Language Processing with Python
Natural Language Processing with Python
Benjamin Bengfort
 
R by example: mining Twitter for consumer attitudes towards airlines
R by example: mining Twitter for consumer attitudes towards airlinesR by example: mining Twitter for consumer attitudes towards airlines
R by example: mining Twitter for consumer attitudes towards airlines
Jeffrey Breen
 
NLTK in 20 minutes
NLTK in 20 minutesNLTK in 20 minutes
NLTK in 20 minutes
Jacob Perkins
 
Text Mining with R -- an Analysis of Twitter Data
Text Mining with R -- an Analysis of Twitter DataText Mining with R -- an Analysis of Twitter Data
Text Mining with R -- an Analysis of Twitter Data
Yanchang Zhao
 

Viewers also liked (8)

NLP& Bigdata. Motivation and Action
NLP& Bigdata. Motivation and ActionNLP& Bigdata. Motivation and Action
NLP& Bigdata. Motivation and Action
 
Text analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco ControlText analytics in Python and R with examples from Tobacco Control
Text analytics in Python and R with examples from Tobacco Control
 
Introducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with rIntroducing natural language processing(NLP) with r
Introducing natural language processing(NLP) with r
 
Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)Natural Language Processing in R (rNLP)
Natural Language Processing in R (rNLP)
 
Natural Language Processing with Python
Natural Language Processing with PythonNatural Language Processing with Python
Natural Language Processing with Python
 
R by example: mining Twitter for consumer attitudes towards airlines
R by example: mining Twitter for consumer attitudes towards airlinesR by example: mining Twitter for consumer attitudes towards airlines
R by example: mining Twitter for consumer attitudes towards airlines
 
NLTK in 20 minutes
NLTK in 20 minutesNLTK in 20 minutes
NLTK in 20 minutes
 
Text Mining with R -- an Analysis of Twitter Data
Text Mining with R -- an Analysis of Twitter DataText Mining with R -- an Analysis of Twitter Data
Text Mining with R -- an Analysis of Twitter Data
 

Recently uploaded

Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AIEnchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Vladimir Iglovikov, Ph.D.
 
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy SurveyTrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
Adtran
 
How to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For FlutterHow to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For Flutter
Daiki Mogmet Ito
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
Ana-Maria Mihalceanu
 
Full-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalizationFull-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalization
Zilliz
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
Safe Software
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
Uni Systems S.M.S.A.
 
A tale of scale & speed: How the US Navy is enabling software delivery from l...
A tale of scale & speed: How the US Navy is enabling software delivery from l...A tale of scale & speed: How the US Navy is enabling software delivery from l...
A tale of scale & speed: How the US Navy is enabling software delivery from l...
sonjaschweigert1
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Aggregage
 
Climate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing DaysClimate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing Days
Kari Kakkonen
 
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
Neo4j
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
Kumud Singh
 
Removing Uninteresting Bytes in Software Fuzzing
Removing Uninteresting Bytes in Software FuzzingRemoving Uninteresting Bytes in Software Fuzzing
Removing Uninteresting Bytes in Software Fuzzing
Aftab Hussain
 
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
Speck&Tech
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
Neo4j
 
PCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase TeamPCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase Team
ControlCase
 
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
James Anderson
 
Artificial Intelligence for XMLDevelopment
Artificial Intelligence for XMLDevelopmentArtificial Intelligence for XMLDevelopment
Artificial Intelligence for XMLDevelopment
Octavian Nadolu
 
20240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 202420240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 2024
Matthew Sinclair
 

Recently uploaded (20)

Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AIEnchancing adoption of Open Source Libraries. A case study on Albumentations.AI
Enchancing adoption of Open Source Libraries. A case study on Albumentations.AI
 
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy SurveyTrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy Survey
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
 
How to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For FlutterHow to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For Flutter
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
 
Full-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalizationFull-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalization
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
 
A tale of scale & speed: How the US Navy is enabling software delivery from l...
A tale of scale & speed: How the US Navy is enabling software delivery from l...A tale of scale & speed: How the US Navy is enabling software delivery from l...
A tale of scale & speed: How the US Navy is enabling software delivery from l...
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
 
Climate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing DaysClimate Impact of Software Testing at Nordic Testing Days
Climate Impact of Software Testing at Nordic Testing Days
 
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
GraphSummit Singapore | Enhancing Changi Airport Group's Passenger Experience...
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
 
Removing Uninteresting Bytes in Software Fuzzing
Removing Uninteresting Bytes in Software FuzzingRemoving Uninteresting Bytes in Software Fuzzing
Removing Uninteresting Bytes in Software Fuzzing
 
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
Cosa hanno in comune un mattoncino Lego e la backdoor XZ?
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
 
PCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase TeamPCI PIN Basics Webinar from the Controlcase Team
PCI PIN Basics Webinar from the Controlcase Team
 
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
 
Artificial Intelligence for XMLDevelopment
Artificial Intelligence for XMLDevelopmentArtificial Intelligence for XMLDevelopment
Artificial Intelligence for XMLDevelopment
 
20240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 202420240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 2024
 

Natural language procesing in R