Spark手把手:[e2-spk-s03]

9. 3 . 1

10. 3 . 2

11. 3 . 3

12. 3 . 4

13. 3 . 5

14. 3 . 5

15. docker run -v $HOME/docker/spark/e2spkv01:/e2spkv01:ro --name e2spks03-mysql -e MYSQL_ROOT_PASSWORD=e2spkv01 -d mysql 3 . 6

16. docker exec -it e2spks03-mysql /bin/bash mysql -u root -pe2spkv01 source /e2spkv01/e2-spk-s03/scripts/northwind.sql; 3 . 7

17. 3 . 8

18. docker run -v $HOME/docker/spark/e2spkv01:/e2spkv01:rw -p 8080:8080 --name e2spks03-zeppelin --link e2spks03-mysql:mysql -d dylanmei/zeppelin 3 . 9

19. 3 . 10

20. 3 . 11

21. 4 . 1

22. 4 . 2

23. 4 . 3

24. 4 . 4

25. 4 . 5

26. 4 . 6

27. 4 . 7

28. 4 . 8

29. 4 . 9

30. 4 . 9

31. 4 . 9

32. 4 . 9

33. 4 . 9

34. 4 . 9

35. 4 . 10

36. 4 . 10

37. 4 . 10

38. 4 . 10

39. 4 . 10

40. 4 . 10

41. 4 . 11

42. 4 . 11

43. 4 . 11

44. 4 . 11

45. 4 . 11

46. 4 . 12

47. "age";"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month" 30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown 33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"fai 35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"fail 30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unkno 4 . 12

48. 4 . 13

49. 4 . 14

50. 4 . 15

51. 4 . 16

52. 4 . 17

53. 4 . 18

54. 5 . 1

55. 5 . 2

56. 5 . 2

57. 5 . 2

58. 5 . 2

59. 5 . 2

60. 5 . 3

61. 5 . 3

62. 5 . 3

63. 5 . 4

64. 5 . 4

65. 5 . 5

66. 5 . 5

67. 5 . 6

68. 5 . 6

69. 5 . 7

70. 5 . 7

71. 5 . 8

72. 5 . 9

73. 5 . 10

74. 5 . 11

75. 5 . 12

76. 5 . 13

77. 5 . 13

78. val df_case01 = df.groupBy("A", "B").pivot("C").sum("D") z.show(df_case01) // use zeppelin to show the result 5 . 13

79. 5 . 14

80. 5 . 14

81. 5 . 15

82. 5 . 15

83. 5 . 15

84. 5 . 15

85. 5 . 15

86. 5 . 15

87. 5 . 16

88. 5 . 16

89. 5 . 16

90. 5 . 16

91. 5 . 16

92. 5 . 17

93. 5 . 17

94. 5 . 17

95. 5 . 18

96. 5 . 19

97. 5 . 19

98. 5 . 19

99. 5 . 19

100. 5 . 19

101. 5 . 20

102. 5 . 20

103. 5 . 21

104. 5 . 21

105. 5 . 21

106. 5 . 21

107. 5 . 21

108. 5 . 21

109. 5 . 22

110. 5 . 23

111. 5 . 23

112. 5 . 23

113. 5 . 23

114. df.groupBy("A", "B").pivot("C").sum("D").show() 5 . 23

115. df.groupBy("A", "B").pivot("C").sum("D").show() df.groupBy("A", "B").pivot("C", Seq("small", "large")).sum("D").show() 5 . 23

116. 5 . 24

117. 5 . 24

118. df.groupBy("A", "B").pivot("C").agg(sum("D"), avg("D")).show 5 . 24

119. 5 . 25

120. 5 . 25

121. df.withColumn(“p”, concat($”p1”, $”p2”)) .groupBy(“a”, “b”) .pivot(“p”) .agg(…) 5 . 25

122. 5 . 26

123. 5 . 26

124. 5 . 26

125. df.withColumn(“p”, concat($”p1”, $”p2”)) .groupBy(“a”, “b”) .pivot(“p”) .agg(…) 5 . 26

126. 6 . 1

127. 6 . 2

128. 6 . 2

129. 6 . 2

130. 6 . 2

131. 6 . 2

132. 6 . 3

133. 6 . 3

134. 6 . 3

135. 6 . 3

136. 6 . 3

137. 6 . 3

138. 6 . 4

139. 6 . 4

140. 6 . 4

141. 6 . 4

142. 6 . 4

143. 6 . 4

144. 6 . 5

145. 6 . 5

146. 6 . 5

147. 6 . 6

148. 6 . 6

149. 6 . 6

150. 6 . 6

151. 6 . 6

152. 6 . 6

153. 6 . 7

154. 6 . 7

155. 6 . 8

156. 6 . 8

157. 6 . 9

158. 6 . 9

159. 6 . 10

160. 6 . 10

161. 6 . 11

162. 6 . 12

163. 6 . 13

164. 7 . 1

165. 7 . 2

166. 7 . 2

167. 7 . 2

168. 7 . 2

169. 7 . 2

170. 7 . 3

171. 7 . 3

172. 7 . 4

173. 7 . 4

174. 7 . 4

175. 7 . 4

176. 7 . 5

177. 7 . 5

178. %psql show tables 7 . 5

179. %psql show tables 7 . 5

180. 7 . 6

181. 7 . 6

182. 7 . 6

183. 7 . 6

184. 7 . 6

185. 7 . 6

186. 7 . 7

187. 7 . 7

188. 7 . 7

189. 7 . 7

190. 7 . 8

191. 7 . 8

192. 7 . 8

193. 7 . 8

194. 7 . 9

195. 7 . 10

196. import org.apache.spark.sql.SaveMode val jdbcUrl = "jdbc:mysql://e2spks03-mysql:3306/northwind?user=root&password=e2spkv01" val outDataFolder = "file:///e2spkv01/e2-spk-s03/datas/northwind" // JDBC Tables val nw_tables = List("Categories","CustomerCustomerDemo","CustomerDemographics","Customers", " ,"EmployeeTerritories", "OrderDetails", "Orders","Region","Products","Ship // DataFrame "JDBC" nw_tables.foreach(table => { val df = sqlContext.read .format("jdbc") .option("url", jdbcUrl) .option("dbtable", table) .option("driver", "com.mysql.jdbc.Driver") 7 . 10

197. 7 . 11

198. 7 . 11

199. 7 . 12

200. val in_DataFolder = "file:///e2spkv01/e2-spk-s03/datas/northwind" val nw_parquets = List("Categories","Customers", "Employees" ,"EmployeeTerritories", "OrderDetails", "Orders","Region","Products","Shippers","Suppl nw_parquets.foreach(nw_parquet => { val df = sqlContext.read.format("parquet").load(in_DataFolder + "/" + nw_parquet) // DataFrame schema stdout df.printSchema() // DataFrame stdout df.show() } ) 7 . 12

201. 7 . 13

202. 7 . 13

203. 7 . 14

204. 8 . 1

205. 8 . 2

206. 8 . 2

207. 8 . 2

208. 8 . 2

209. 8 . 3

210. val in_DataFolder = "file:///e2spkv01/e2-spk-s03/datas/northwind" // Parquest val nw_parquets = List("Categories","Customers", "Employees" ,"EmployeeTerritories", "OrderDetails", "Orders","Region","Products","Shippers","Suppl // DataFrame "Parquet" nw_parquets.foreach(nw_parquet => { sqlContext.read.format("parquet").load(in_DataFolder + "/" + nw_parquet)).registerTemp ) 8 . 3

211. 8 . 4

212. 8 . 4

213. 8 . 5

214. 8 . 5

215. 8 . 5

216. 8 . 5

217. 8 . 5

218. 8 . 6

219. 8 . 6

220. 8 . 6

221. 8 . 6

222. 8 . 6

223. 8 . 6

224. 8 . 7

225. 8 . 7

226. 8 . 7

227. 8 . 7

228. 8 . 7

229. 8 . 7

230. 8 . 7

231. 9

Spark手把手:[e2-spk-s03]

Recommended

Recommended

More Related Content

What's hot

What's hot (20)

Viewers also liked

Viewers also liked (20)

Similar to Spark手把手:[e2-spk-s03]

Similar to Spark手把手:[e2-spk-s03] (20)

More from Erhwen Kuo

More from Erhwen Kuo (19)

Recently uploaded

Recently uploaded (20)

Spark手把手:[e2-spk-s03]