1
2
2
2
2
2
2
2
3 . 1
3 . 2
3 . 3
3 . 4
3 . 5
3 . 5
docker	run	-v	$HOME/docker/spark/e2spkv01:/e2spkv01:ro	
--name	e2spks03-mysql	
-e	MYSQL_ROOT_PASSWORD=e2spkv01	
-d	mysql
3 . 6
docker	exec	-it	e2spks03-mysql	/bin/bash
mysql	-u	root	-pe2spkv01
source	/e2spkv01/e2-spk-s03/scripts/northwind.sql;
3 . 7
3 . 8
docker	run	-v	$HOME/docker/spark/e2spkv01:/e2spkv01:rw	
-p	8080:8080	
--name	e2spks03-zeppelin	
--link	e2spks03-mysql:mysql	
-d	dylanmei/zeppelin
3 . 9
3 . 10
3 . 11
4 . 1
4 . 2
4 . 3
4 . 4
4 . 5
4 . 6
4 . 7
4 . 8
4 . 9
4 . 9
4 . 9
4 . 9
4 . 9
4 . 9
4 . 10
4 . 10
4 . 10
4 . 10
4 . 10
4 . 10
4 . 11
4 . 11
4 . 11
4 . 11
4 . 11
4 . 12
"age";"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month"
30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown
33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"fai
35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"fail
30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unkno
																				
4 . 12
4 . 13
4 . 14
4 . 15
4 . 16
4 . 17
4 . 18
5 . 1
5 . 2
5 . 2
5 . 2
5 . 2
5 . 2
5 . 3
5 . 3
5 . 3
5 . 4
5 . 4
5 . 5
5 . 5
5 . 6
5 . 6
5 . 7
5 . 7
5 . 8
5 . 9
5 . 10
5 . 11
5 . 12
5 . 13
5 . 13
val	df_case01	=	df.groupBy("A",	"B").pivot("C").sum("D")
z.show(df_case01)	//	use	zeppelin	to	show	the	result
5 . 13
5 . 14
5 . 14
5 . 15
5 . 15
5 . 15
5 . 15
5 . 15
5 . 15
5 . 16
5 . 16
5 . 16
5 . 16
5 . 16
5 . 17
5 . 17
5 . 17
5 . 18
5 . 19
5 . 19
5 . 19
5 . 19
5 . 19
5 . 20
5 . 20
5 . 21
5 . 21
5 . 21
5 . 21
5 . 21
5 . 21
5 . 22
5 . 23
5 . 23
5 . 23
5 . 23
df.groupBy("A",	"B").pivot("C").sum("D").show()
5 . 23
df.groupBy("A",	"B").pivot("C").sum("D").show()
df.groupBy("A",	"B").pivot("C",	Seq("small",	"large")).sum("D").show()
5 . 23
5 . 24
5 . 24
df.groupBy("A",	"B").pivot("C").agg(sum("D"),	avg("D")).show
5 . 24
5 . 25
5 . 25
df.withColumn(“p”,	concat($”p1”,	$”p2”))
.groupBy(“a”,	“b”)
.pivot(“p”)
.agg(…)
5 . 25
5 . 26
5 . 26
5 . 26
df.withColumn(“p”,	concat($”p1”,	$”p2”))
.groupBy(“a”,	“b”)
.pivot(“p”)
.agg(…)
5 . 26
6 . 1
6 . 2
6 . 2
6 . 2
6 . 2
6 . 2
6 . 3
6 . 3
6 . 3
6 . 3
6 . 3
6 . 3
6 . 4
6 . 4
6 . 4
6 . 4
6 . 4
6 . 4
6 . 5
6 . 5
6 . 5
6 . 6
6 . 6
6 . 6
6 . 6
6 . 6
6 . 6
6 . 7
6 . 7
6 . 8
6 . 8
6 . 9
6 . 9
6 . 10
6 . 10
6 . 11
6 . 12
6 . 13
7 . 1
7 . 2
7 . 2
7 . 2
7 . 2
7 . 2
7 . 3
7 . 3
7 . 4
7 . 4
7 . 4
7 . 4
7 . 5
7 . 5
%psql
show	tables
																								
7 . 5
%psql
show	tables
																								
7 . 5
7 . 6
7 . 6
7 . 6
7 . 6
7 . 6
7 . 6
7 . 7
7 . 7
7 . 7
7 . 7
7 . 8
7 . 8
7 . 8
7 . 8
7 . 9
7 . 10
import	org.apache.spark.sql.SaveMode
val	jdbcUrl	=	"jdbc:mysql://e2spks03-mysql:3306/northwind?user=root&password=e2spkv01"
val	outDataFolder	=	"file:///e2spkv01/e2-spk-s03/datas/northwind"
//	 JDBC Tables
val	nw_tables	=	List("Categories","CustomerCustomerDemo","CustomerDemographics","Customers",	"
																				,"EmployeeTerritories",	"OrderDetails",	"Orders","Region","Products","Ship
	 	 	 	 	
//	 DataFrame "JDBC"
nw_tables.foreach(table	=>	{
	val	df	=	sqlContext.read
		.format("jdbc")
		.option("url",	jdbcUrl)	
		.option("dbtable",	table)	
		.option("driver",	"com.mysql.jdbc.Driver") 7 . 10
7 . 11
7 . 11
7 . 12
val	in_DataFolder	=	"file:///e2spkv01/e2-spk-s03/datas/northwind"
val	nw_parquets	=	List("Categories","Customers",	"Employees"
	 ,"EmployeeTerritories",	"OrderDetails",	"Orders","Region","Products","Shippers","Suppl
nw_parquets.foreach(nw_parquet	=>	{
	 val	df	=	sqlContext.read.format("parquet").load(in_DataFolder	+	"/"	+	nw_parquet)
	 //	 DataFrame schema stdout
	 df.printSchema()	
	 //	 DataFrame stdout
	 df.show()	
	 }
)
																								
7 . 12
7 . 13
7 . 13
7 . 14
8 . 1
8 . 2
8 . 2
8 . 2
8 . 2
8 . 3
val	in_DataFolder	=	"file:///e2spkv01/e2-spk-s03/datas/northwind"
//	 Parquest
val	nw_parquets	=	List("Categories","Customers",	"Employees"
	 ,"EmployeeTerritories",	"OrderDetails",	"Orders","Region","Products","Shippers","Suppl
//	 DataFrame "Parquet"
nw_parquets.foreach(nw_parquet	=>	{
	 sqlContext.read.format("parquet").load(in_DataFolder	+	"/"	+	nw_parquet)).registerTemp
)
																								
8 . 3
8 . 4
8 . 4
8 . 5
8 . 5
8 . 5
8 . 5
8 . 5
8 . 6
8 . 6
8 . 6
8 . 6
8 . 6
8 . 6
8 . 7
8 . 7
8 . 7
8 . 7
8 . 7
8 . 7
8 . 7
9

Spark手把手:[e2-spk-s03]