SlideShare a Scribd company logo
Alexander Ioffe

@deusaquilus
Quill + =
Better Together
So What’s The Difference?
• Abstraction

• Encapsulation

• Error Handling

• Good Control Flow

• Performance
Application Development
Languages
Data Retrieval 

Languages
• Natural Expression

• Possible Optimization

• Good Control Flow

• Performance
They Make Different Tradeoffs!
AbstractionPower
Possible Optimizations
Data Retrieval

Languages
Application Development
Languages
Example Please???
CREATE VIEW HelloAmerican AS
SELECT 'Hello ' || t.firstName + ' ' || t.lastName + ' of ' || a.city
FROM Americans t
JOIN Addresses a on t.address_id == a.id
-- Hello John James of New York
CREATE VIEW HelloCanadian AS
SELECT 'Hello ' + t.name + ' ' + t.surname + ' of ' + a.city
FROM Canadians t
JOIN Addresses a on t.residence_id == a.id
-- Hello Jim Jones of Toronto
CREATE VIEW HelloYeti AS
SELECT 'Hello ' + t.gruntingSound + ' ' + t.roaringSound + ' of ' + a.city
FROM AbominableShowmen t
JOIN Addresses a on t.cave_id == a.id
-- Hello Aaargalah Gralala of Kholat Syakhl
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE VIEW AmericanClients
AS SELECT * from AddressToSomeone(
SELECT t.firstName as called, t.lastName as alsoCalled, a.address_id as whereHeLives_id
FROM Americans
)
CREATE VIEW CanadianClients
AS SELECT * from AddressToSomeone(
SELECT t.name as called, t.surname as alsoCalled, a.residence_id as whereHeLives_id
FROM Canadians
)
CREATE VIEW YetiClients
AS SELECT * from AddressToSomeone(
SELECT t.gruntingSound as called, t.roaringSound as alsoCalled,
a.cave_id as whereHeLives_id
FROM AbominableShowmen
)
CREATE FUNCTION concatName (
@called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id)
)
AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
SELECT concatName(t.firstName, t.lastName, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.name, t.surname, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.gruntingSound, t.roaringSound, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION concatName (
@called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id)
)
AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR;
whereHeLives_id)
)
SELECT
'Hello ' ||t .called || ' ' || t.alsoCalled || ' of ' || a.city,
CASE
WHEN zd.zone_type = 'K' THEN 'StandardCategory'
WHEN zd.zone_type = 'N' AND rc.barbaz = 'GT' THEN 'NonStandardCategory'
ELSE 'UnknownCategory'
END as zoning_category1,
CASE
WHEN ru.kdd = 'IK' THEN 'Insanity'
WHEN zd.kdd = 'N' AND rc.barbaz = 'GTT' THEN 'MoreInsanity'
ELSE 'I_Dont_Even_Know_What_Goes_Here'
END as zoning_category2
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id = a.id
JOIN ResidenceUnit ru on a.rid = ru.id
JOIN ResidenceClass rc on ru.class_id = rc.class_id
JOIN ZoningDesignation zd on ru.zone_id = zd.rzid and zd.cid = rc.class_id
SELECT concatName(t.firstName, t.lastName, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.name, t.surname, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
SELECT concatName(t.gruntingSound, t.roaringSound, a.city)
FROM American t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
CREATE FUNCTION AddressToSomeone (
@humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id)
)
SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city
FROM @humanoidLivingSomewhere t
JOIN Addresses a on t.whereHeLives_id == a.id
WHERE a.current = true
DataFrame Can!
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
DataFrame Can!
addressToSomeone(
americans.select($"firstName" as "called", $"lastName" as "alsoCalled",
$"address_id" as "whereHeLives_id")
)
addressToSomeone(
canadians.select($"name" as "called", $"surname" as "alsoCalled",
$"residence_id" as "whereHeLives_id")
)
addressToSomeone(
yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled",
$"cave_id" as "whereHeLives_id")
)
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"id" === $"whereHeLives_id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
DataFrame Can…
addressToSomeone(
americans.select($"firstName" as "called", $"lastName" as "alsoCalled",
$"address_id" as "whereHeLives_id")
)
addressToSomeone(
canadians.select($"name" as "called", $"surname" as "alsoCalled",
$"residence_id" as "whereHeLives_id")
)
addressToSomeone(
yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled",
$"cave_id" as "whereHeLives_id")
)
def addressToSomeone(df: DataFrame) = {
df.as("t")
.join(addresses.as("a"), $"whereHeLives_id" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
… Hurt!
def insaneJoin(df: DataFrame) =
df.as("t")
.join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id")
.join(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.join(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rid") &&
($"zd.cid" === $"rc.class_id")
)
.select(
concat(
lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"),
when($"zd.zone_type" === lit("K"), "StandardCategory")
.when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")),
"NonStandardCategory")
.otherwise("UnknownCategory")
.as("zoning_category1"),
when($"ru.kdd" === lit("IK"), "Insanity")
.when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")),
"MoreInsanity")
.otherwise("I_Dont_Even_Know_What_Goes_Here")
.as("zoning_category2")
)
… Hurt!
def insaneJoin(df: DataFrame) =
df.as("t")
.join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id")
.join(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.join(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rzid") &&
($"zd.cid" === $"rc.class_id")
)
.select(
concat(
lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled",
lit(" of "), $"a.city"),
when($"zd.zone_type" === lit("K"), "StandardCategory")
.when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")),
"NonStandardCategory")
.otherwise("UnknownCategory")
.as("zoning_category1"),
when($"ru.kdd" === lit("IK"), "Insanity")
.when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")),
"MoreInsanity")
.otherwise("I_Dont_Even_Know_What_Goes_Here")
.as("zoning_category2")
)
case class HumanoidLivingSomewhere(
called:String, alsoCalled: String, whereHeLives_id:Int
)
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"a.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"ru.zone_id" === "zd.rzid") &&
($"zd.cid" === $"ru.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Dataset[(HumanoidLivingSomewhere, Address)]
Wait… Dataset?
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
Dataset[
(((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass)
]
def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) =
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
case class American(
firstName:String, lastName:String, address_id:Int,
irrelevantP1:String... irrelevantP100:String
)
case class Canadian(
name:String, surname:String, residence_id:Int,
irrelevantP1:String... irrelevantP100:String
)
case class Yeti(
gruntingSound:String, roaringSound:String, address_id:Int,
irrelevantP1:String... irrelevantP100:String
)
Say There's Stuff We Don't Care About
case class Address(
id:Int, street:String, city:String, current: Boolean
irrelevantA1:String... irrelevantA100:String
)
case class HumanoidLivingSomewhere(
called:String, alsoCalled: String, whereHeLives_id:Int
)
Let's Plug it In!
def addressToSomeone(humanoidLivingSomewhere: DataFrame) = {
humanoidLivingSomewhere.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
}
addressToSomeone(
americans.select(
$"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
)
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
Explain This!
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
SQL Does the Same Thing
spark.sql(
"""
|select concat('Hello ', t.called, ' ', t.alsoCalled, ' of ', a.city) as _1
|from (
| select firstName as called, lastName as alsoCalled, address_id as whereHeLives_id
| from americans
|) as t
|join addresses a on (t.whereHeLives_id = a.id)
|where a.current = true
|""".stripMargin
)
.explain()
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
*(5) Project [concat(Hello , called, , alsoCalled, of , city)]
+- *(5) SortMergeJoin [whereHeLives_id], [id], Inner
+- Exchange hashpartitioning(whereHeLives_id)
+- *(1) Project [firstName AS called, ... AS whereHeLives_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
Explain This!
In English Please?
*(🤞) Gimme My Result! [concat(Hello , called, , alsoCalled, of , city)]
+- *(💂) We're Joining! Huzzah! [whereHeLives_id], [id], Inner
+- Join Key for the Left Side! (whereHeLives_id)
+- *(1) Rename these like I said! Pronto! [firstName as Called... ]
+- *(😇) I'm a smart format, load only: [firstName,lastName,address_id]
+- Join Key for the Right Side! (id)
+- *(😇) I'm a smart format, load only: [id,city,current]
Read only current addr. from the file! 😎: [EqualTo(current,true)],
americans
.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id")
.as("t")
.join(addresses.as("a"), $"whereHeLivesId" === $"id")
.select(
concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"))
.filter($"a.current" === lit(true))
.explain()
How About Dataset?
def addressToSomeone(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = {
humanoidsLivingSomewhere
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
}
val americanClients =
addressToSomeone(
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
)
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
Explain This... Please!
*(3) SerializeFromObject [UTF8String]
+- *(3) MapElements java.lang.String
+- DeserializeToObject newInstance(Tuple2)
+- SortMergeJoin [_1.whereHeLives_id], [_2.id], Inner
+- Exchange hashpartitioning(_1.whereHeLives_id)
+- *(1) Project [called, alsoCalled, whereHeLives_id]
+- *(1) SerializeFromObject [UTF8String]
+- *(1) MapElements HumanoidLivingSomewhere
+- DeserializeToObject newInstance(American)
+- FileScan parquet [firstName,lastName,address_id,irrelevantP1,irrelevantP2,i
+- Exchange hashpartitioning(_2.id)
+- FileScan parquet [id,street,city,current,irrelevantA1,irrelevantA2,irrelevantA3,irrel
PushedFilters: []
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
.explain()
*(🤮) Serialize Back Into a String Expensive!
+- *(3) Do the Outer Map that we Invoked
+- (🤮) Deserialize Tuple2 Expensive!
+- & We're Joining! Huzzah! [_1.whereHeLives_id], [_2.id], Inner
+- Join Key for the Left Side (_1.whereHeLives_id)
+- *(1) Project [called, alsoCalled, whereHeLives_id]
+- *(🤮) Serialize the Join Key. Expensive!
+- *(1) MapElements HumanoidLivingSomewhere
+- (🤮) Deserialize into a JVM Object (i.e. class American)
+- Scan All 'American' Columns Including 100 irrelevant ones!😱
+- Join Key for the Right Side (_2.id)
+- Scan All 'Address' Columns Including 100 irrelevant ones! 😱
We Need to Read The Entire Dataset! No Excluding Non-Current Addresses 😢
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
.explain()
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
Which Columns are
we using in here?
Which Columns are
we using in here?
Which Columns are
we using in here?
⏸
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
I Guess We Need

All Of Them!
I Guess We Need

All Of Them!
I Guess We Need

All Of Them!
⏸
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere)
.joinWith(addresses, ¯_(ツ)_/¯)
.filter { ¯_(ツ)_/¯ : Boolean }
.map { ¯_(ツ)_/¯ : String }
⏸
What columns
am I joining by???
val americanClients =
americans.map(a =>
HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id)
)
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { tup => tup._2.current == true }
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
What We See:
What Catalyst Sees:
val americanClients =
americans.map( (🖼 American) => HumanoidLivingSomewhere 🖼 ) 🤮
.joinWith(addresses, $"whereHeLives_id" === $"id")
.filter { (🖼 HumanoidLivingSomewhere) => Boolean } 🤮
.map { (🖼 HumanoidLivingSomewhere) => String 🖼 } 🤮
→ →
→
→ →
americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId))
.joinWith(addresses, $"id" === $"whereHeLivesId")
.filter(ta => ta._2.current == true)
.map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
What About Frameless?
addressToSomeone(
americans.select(americans('firstName), americans('lastName), americans('addressId))
.deserialized.map{ case (name, age, whereHeLives_id ) =>
HumanoidLivingSomewhere(
name.asInstanceOf[String],
age.asInstanceOf[String],
whereHeLives_id.asInstanceOf[Int])
}
)
What About Frameless?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
addressToSomeone(
americans.select(americans('firstName), americans('lastName), americans('addressId))
.deserialized.map{ case (name, age, whereHeLives_id ) =>
HumanoidLivingSomewhere(
name.asInstanceOf[String],
age.asInstanceOf[String],
whereHeLives_id.asInstanceOf[Int])
}
)
What About Frameless?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val joined = humanoid
.joinInner(addresses) { humanoid('where) === addresses('id) }
joined.select(concat(
lit("Hello "), joined.colMany('_1, 'called), lit(" "),
joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city)))
}
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
What Sub-Tuple is class_id inside of?
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
What Sub-Tuple is class_id inside of?
Dataset[
(((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass)
]
def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = {
val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) }
val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) }
val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) }
val j4 = j3.joinInner(zoningDesignation) {
(j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) &&
(zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id))
}
type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation)
j4.select(
concat(
lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "),
j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city)
),
when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory"))
.when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"),
lit("NonStandardCategory"))
.otherwise( lit("UnknownCategory")),
when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity"))
.when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"),
lit("MoreInsanity"))
.otherwise(lit("I_Dont_Even_Know_What_Goes_Here"))
)
}
Also... What's This???
[error] found : frameless.TypedColumn[Nothing,String]
[error] required:
frameless.AbstractTypedColumn[((((org.ctl.complex.HumanoidLivingSomewhere,
org.ctl.complex.Address), org.ctl.complex.ResidenceUnit),
org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation),String]
[error] Note: Nothing <: ((((org.ctl.complex.HumanoidLivingSomewhere,
org.ctl.complex.Address), org.ctl.complex.ResidenceUnit),
org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation), but class
AbstractTypedColumn is invariant in type T.
[error] You may wish to define T as +T instead. (SLS 4.5)
[error] when(j4.colMany('_2, 'zone_type) === "K", lit("StandardCategory"))
DataFrame/SQL Untyped 😢
Column Pruning
Filter Pushdown 😎
Dataset Almost Typed 😕 Extra Serialization 🤮
Frameless Typed 😃
Very Complex if you
don't know Shapeless.
TypecheckingPower
Possible Optmizations
DataFrame/
Scala Code
(i.e. Dataset[T])
SQL
?
Scala Code SQL DataFrame
?
Scala Code SQL DataFrame
TypecheckingPower
Possible Optmizations
Scala Code SQL
Quill
DataFrame
quote {
scala-syntax-tree
}
AST
Macro
Scala Code SQL
Quill
quote {
scala-syntax-tree
}
AST
Macro
Query[R] SQL
Quill
quote { Query[R] }
AST
Macro
Quoted[Query[R]]
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[?]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield ( )
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Query [Humanoid]
Humanoid
Query [Address]
Address
⏸
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses if (
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Address
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
LEFT JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.leftJoin(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
Address
Option[Address] Address
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == trueString
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Query[Humanoid] => Query[String]
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[String]]
Query[Humanoid] => Query[String]
Query[String]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield h
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
h.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Humanoid]]
Query[Humanoid] => Query[Humanoid]
Query[Humanoid]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield a
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
a.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Address]]
Query[Humanoid] => Query[Address]
Query[Address]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (h, a)
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
h.*, a.*
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[(Humanoid, Address)]]
Query[Humanoid] => Query[(Humanoid, Address)]
Query[(Humanoid, Address)]
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (Foobar(h, a))
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
?? I don't understand objects ??
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
Quoted[Query[Humanoid] => Query[Foobar]]
Query[Humanoid] => Query[Foobar]
Query[Foobar]
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.r && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
[error] quillspark-examples/src/main/scala/Main.scala:107:28:
value rid is not a member of org.ctl.complex.ZoningDesignation
[error] ru.zone_id == zd.rid && zd.cid == rc.class_id
[error] ^
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
⏸
humanoidsLivingSomewhere.as("t")
.joinWith(addresses.as("a"), $"whereHeLives_id" === $"id")
.joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id")
.joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id")
.joinWith(zoningDesignation.as("zd"),
($"_1._2.zone_id" === "zd.rzid") &&
($"zd.cid" === $"_1._2.class_id")
)
.map { case ((((t, a), ru), rc), zd) => (
s"Hello ${t.called} ${t.alsoCalled} of ${a.city}",
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT")
"NonStandardCategory"
else
"UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GT")
"MoreInsanity"
else
"I_Dont_Even_Know_What_Goes_Here"
)
}
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory",
if (ru.kdd == "IK") "Insanity"
else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity"
else "I_Dont_Even_Know_What_Goes_Here"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city,
if (zd.zone_type == "K") "StandardCategory"
else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard"
else "UnknownCategory"
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
zd <- zoningDesignation.join(zd =>
ru.zone_id == zd.rid && zd.cid == rc.class_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)

rc <- residenceClass.join(rc => ru.class_id == rc.class_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
ru <-residenceUnit.join(ru => a.rid == ru.id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
}
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
▶
CREATE FUNCTION AddressToSomeone (
@humanoid
Table(
called VARCHAR;
alsoCalled VARCHAR;
whereHeLives_id
)
)
SELECT
'Hello ' ||
h.called || ' ' ||
h.alsoCalled || ' of ' ||
a.city
FROM
@humanoid h
JOIN
Addresses a
ON
h.whereHeLives_id == a.id
WHERE
a.current == true
▶
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
quote { addressToSomeone(americans.map(am => 

Humanoid(am.firstName, am.lastName, am.address_id))) }
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
quote {addressToSomeone(canadians.map(am => 

Humanoid(am.name, am.surname, am.residence_id)))}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
case class Humanoid(
called:String,
alsoCalled: String,
whereHeLives_id:Int
)
val addressToSomeone = quote {
(humanoid: Query[Humanoid]) =>
for {
h <- humanoid
a <- addresses.join(a =>
a.id == h.whereHeLives_id
)
if (a.current == true)
} yield (
"Hello " +
h.called + " " +
h.alsoCalled + " of " +
a.city
)
}
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
▶
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
DataFrame
quote {addressToSomeone(yeti(am => 

Humanoid(am.gruntingSound,
am.roaringSound, am.cave_id)))}
quote {addressToSomeone(canadians.map(am => 

Humanoid(am.name, am.surname,
am.residence_id)))}
quote { addressToSomeone(americans.map(am => 

Humanoid(am.firstName, am.lastName,
am.address_id))) }
run(Query[String]) run(Query[String]) run(Query[String])
DataFrame

Dataset[String]
SELECT
'Hello ' ||
h.firstName || ' ' ||
h.lastName || ' of ' ||
a.city
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.name || ' ' ||
h.surname || ' of ' ||
a.city
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
run(Query[String]) run(Query[String]) run(Query[String])
DataFrame

Dataset[Humanoid]
SELECT
h
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
h
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
h
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
run(Query[Humanoid]) run(Query[Humanoid]) run(Query[Humanoid])
DataFrame

Dataset[T]
run(Query[T]) run(Query[T]) run(Query[T])
SELECT
?
FROM
Americans h
JOIN
Addresses a
ON
h.address_id == a.id
WHERE
a.current == true
SELECT
?
FROM
Canadians h
JOIN
Addresses a
ON
h.residence_id == a.id
WHERE
a.current == true
SELECT
?
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
= Dataset[T]
run(Query[T])
= Dataset[T]
run(Query[T])
Dataset[T] DataFrame
Easy! Just '.toDF'
Harder! '.[DoIReallyKnowItsThis?]'
run(Query[T])
val spark = SparkSession.builder()
.appName("SparkQuillExample")
.enableHiveSupport()
.getOrCreate()
implicit val sqlContext = spark.sqlContext
import sqlContext.implicits._
import QuillSparkContext._
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
Dataset[Yeti]
Dataset[Address]
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
Quoted[Query[Yeti]]
Quoted[Query[Address]]
Dataset[Yeti]
Dataset[Address]
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiOfSomeplace: Dataset[String] = run(output)
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yetiDS = spark.read.parquet("output/yeti").as[Yeti]
val addressesDS = spark.read.parquet("output/addresses").as[Address]
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yeti = quote { liftQuery(yetiDS) }
val addresses = quote { liftQuery(addressesDS) }
val yetiOfSomeplace: Dataset[String] = run(output)
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
Run This Query:
Then Give Me Back
My Dataset!!!
SELECT
'Hello ' ||
h.gruntingSound || ' ' ||
h.roaringSound || ' of ' ||
a.city
FROM
Yeti h
JOIN
Addresses a
ON
h.cave_id == a.id
WHERE
a.current == true
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val yetiOfSomeplace: Dataset[String] = run(output)
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val addressToSomeone = quote {
(humanoids: Query[Humanoid]) =>
for {
h <- humanoids
a <- addresses.join(a => a.id == h.whereHeLives_id)
if (a.current)
} yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city
}
val output = quote {
addressToSomeone(
yeti.map(am =>
Humanoid(am.firstName, am.lastName, am.address_id)
)
)
}
val yetiOfSomeplace: Dataset[String] = run(output)
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
Optimized + No Serialization 😎
Optimized + No Serialization 😎
val yetiOfSomeplace: Dataset[String] = run(output)
val addressToSomeone = quote { Quill Magic! }
▶
val output = quote { Quill Magic! }
val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti]
val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
*(5) Project [concat(Hello , firstName, , lastName, of , city)]
+- *(5) SortMergeJoin [address_id], [id], Inner
+- Exchange hashpartitioning(address_id)
+- *(1) Project [firstName, lastName, address_id]
+- *(1) FileScan parquet [firstName,lastName,address_id]
+- Exchange hashpartitioning(id)
+- *(3) FileScan parquet [id,city,current]
PushedFilters: [EqualTo(current,true)]
▶
val yetiOfSomeplace: Dataset[String] = run(output)
val addressToSomeone = quote { Quill Magic! }
val output = quote { Quill Magic! }
val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti]
val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
// Applicative Joins
yeti.join(addresses).on(_.caveId == _.id)
yeti.leftJoin(addresses).on(_.caveId == _.id)
// Implicit Joins
for {
y <- yeti
a <- addresses if (y.caveId == a.id)
} yield (y, a)
// Semi-Joins
val cavelessYeti = quote {
yeti.filter(y => !addresses.map(_.id).contains(y.caveId))
}
Some other stuff we can do...
Some other stuff we can do...
// Group-By
orders.groupBy(_.sku).map {
case (sku, orders) => (sku, orders.map(_.price).avg)
}
// Concat-Map
val nodesChildren = quote {
(ns: Query[Node]) => ns.concatMap(n => n.children)
}
// Union/UnionAll
val americansAndCanadians = quote {
americans.map(_.firstName) unionAll canadians.map(_.surname)
}
Some other stuff we can do...
// User Defined Aggregation Functions (UDAFs)
spark.udf.register("geomMean", new GeometricMean)
val geomMean = quote {
(q: Query[BigDecimal]) => infix"geomMean(${q})".as[BigDecimal]
}
orders.groupBy(_.sku).map {
case (sku, orders) => (sku, geomMean(orders.map(_.price)))
}
// Using Spark UDFs
spark.udf.register("businessLogicUdf", (str:String) => str + "-suffix")
val businessLogicUdf = quote {
(str: String) => infix"businessLogicUdf(${str})".as[String]
}
quote {
yeti.map(y => businessLogicUdf(y.gruntingSound))
}
https://getquill.io/

https://github.com/getquill/quill

https://gitter.im/getquill/quill
...Try It Out!
libraryDependencies ++= Seq(
"io.getquill" %% "quill-spark" % "3.4.10"
)
<dependency>
<groupId>io.getquill</groupId>
<artifactId>quill-spark_2.12</artifactId>
<version>3.4.10</version>
</dependency>

More Related Content

What's hot

Leveraging Symfony2 Forms
Leveraging Symfony2 FormsLeveraging Symfony2 Forms
Leveraging Symfony2 Forms
Bernhard Schussek
 
CBSE Class XII Comp sc practical file
CBSE Class XII Comp sc practical fileCBSE Class XII Comp sc practical file
CBSE Class XII Comp sc practical file
Pranav Ghildiyal
 
COMP2021 Final Project - LightHTML
COMP2021 Final Project - LightHTMLCOMP2021 Final Project - LightHTML
COMP2021 Final Project - LightHTML
Conrad Lo
 
Reason - introduction to language and its ecosystem | Łukasz Strączyński
Reason - introduction to language and its ecosystem | Łukasz StrączyńskiReason - introduction to language and its ecosystem | Łukasz Strączyński
Reason - introduction to language and its ecosystem | Łukasz Strączyński
Grand Parade Poland
 
레진코믹스가 코틀린으로 간 까닭은?
레진코믹스가 코틀린으로 간 까닭은?레진코믹스가 코틀린으로 간 까닭은?
레진코믹스가 코틀린으로 간 까닭은?
Taeho Kim
 
Kotlin: Let's Make Android Great Again
Kotlin: Let's Make Android Great AgainKotlin: Let's Make Android Great Again
Kotlin: Let's Make Android Great Again
Taeho Kim
 
WTF Oriented Programming, com Fabio Akita
WTF Oriented Programming, com Fabio AkitaWTF Oriented Programming, com Fabio Akita
WTF Oriented Programming, com Fabio Akita
iMasters
 
Tablas, Codigos De Base De Datos
Tablas, Codigos De Base De DatosTablas, Codigos De Base De Datos
Tablas, Codigos De Base De Datosguesta050b04
 
Sql commands
Sql commandsSql commands
Sql commands
Christalin Nelson
 
Functional Error Handling with Cats
Functional Error Handling with CatsFunctional Error Handling with Cats
Functional Error Handling with Cats
Mark Canlas
 
Hacking Your Way To Better Security - Dutch PHP Conference 2016
Hacking Your Way To Better Security - Dutch PHP Conference 2016Hacking Your Way To Better Security - Dutch PHP Conference 2016
Hacking Your Way To Better Security - Dutch PHP Conference 2016
Colin O'Dell
 
PHP object calisthenics
PHP object calisthenicsPHP object calisthenics
PHP object calisthenics
Giorgio Cefaro
 
Datamapper @ Railsconf2010
Datamapper @ Railsconf2010Datamapper @ Railsconf2010
Datamapper @ Railsconf2010
Dirkjan Bussink
 
Working With JQuery Part1
Working With JQuery Part1Working With JQuery Part1
Working With JQuery Part1saydin_soft
 

What's hot (14)

Leveraging Symfony2 Forms
Leveraging Symfony2 FormsLeveraging Symfony2 Forms
Leveraging Symfony2 Forms
 
CBSE Class XII Comp sc practical file
CBSE Class XII Comp sc practical fileCBSE Class XII Comp sc practical file
CBSE Class XII Comp sc practical file
 
COMP2021 Final Project - LightHTML
COMP2021 Final Project - LightHTMLCOMP2021 Final Project - LightHTML
COMP2021 Final Project - LightHTML
 
Reason - introduction to language and its ecosystem | Łukasz Strączyński
Reason - introduction to language and its ecosystem | Łukasz StrączyńskiReason - introduction to language and its ecosystem | Łukasz Strączyński
Reason - introduction to language and its ecosystem | Łukasz Strączyński
 
레진코믹스가 코틀린으로 간 까닭은?
레진코믹스가 코틀린으로 간 까닭은?레진코믹스가 코틀린으로 간 까닭은?
레진코믹스가 코틀린으로 간 까닭은?
 
Kotlin: Let's Make Android Great Again
Kotlin: Let's Make Android Great AgainKotlin: Let's Make Android Great Again
Kotlin: Let's Make Android Great Again
 
WTF Oriented Programming, com Fabio Akita
WTF Oriented Programming, com Fabio AkitaWTF Oriented Programming, com Fabio Akita
WTF Oriented Programming, com Fabio Akita
 
Tablas, Codigos De Base De Datos
Tablas, Codigos De Base De DatosTablas, Codigos De Base De Datos
Tablas, Codigos De Base De Datos
 
Sql commands
Sql commandsSql commands
Sql commands
 
Functional Error Handling with Cats
Functional Error Handling with CatsFunctional Error Handling with Cats
Functional Error Handling with Cats
 
Hacking Your Way To Better Security - Dutch PHP Conference 2016
Hacking Your Way To Better Security - Dutch PHP Conference 2016Hacking Your Way To Better Security - Dutch PHP Conference 2016
Hacking Your Way To Better Security - Dutch PHP Conference 2016
 
PHP object calisthenics
PHP object calisthenicsPHP object calisthenics
PHP object calisthenics
 
Datamapper @ Railsconf2010
Datamapper @ Railsconf2010Datamapper @ Railsconf2010
Datamapper @ Railsconf2010
 
Working With JQuery Part1
Working With JQuery Part1Working With JQuery Part1
Working With JQuery Part1
 

Similar to Quill + Spark = Better Together

CS 542 Controlling Database Integrity and Performance
CS 542 Controlling Database Integrity and PerformanceCS 542 Controlling Database Integrity and Performance
CS 542 Controlling Database Integrity and PerformanceJ Singh
 
CS 542 Database Index Structures
CS 542 Database Index StructuresCS 542 Database Index Structures
CS 542 Database Index StructuresJ Singh
 
Kotlin for Android Developers
Kotlin for Android DevelopersKotlin for Android Developers
Kotlin for Android Developers
Hassan Abid
 
Php functions
Php functionsPhp functions
Php functions
JIGAR MAKHIJA
 
Functional Principles for OO Developers
Functional Principles for OO DevelopersFunctional Principles for OO Developers
Functional Principles for OO Developers
jessitron
 
Feature-Engineering-Earth-Advocacy-Project-2015
Feature-Engineering-Earth-Advocacy-Project-2015Feature-Engineering-Earth-Advocacy-Project-2015
Feature-Engineering-Earth-Advocacy-Project-2015Ankoor Bhagat
 
The Art of Transduction
The Art of TransductionThe Art of Transduction
The Art of Transduction
David Stockton
 
Using R for Building a Simple and Effective Dashboard
Using R for Building a Simple and Effective DashboardUsing R for Building a Simple and Effective Dashboard
Using R for Building a Simple and Effective Dashboard
Andrea Gigli
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick touraztack
 
Writeable ct es_pgcon_may_2011
Writeable ct es_pgcon_may_2011Writeable ct es_pgcon_may_2011
Writeable ct es_pgcon_may_2011
David Fetter
 
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
MongoDB
 
(Ab)Using the MetaCPAN API for Fun and Profit
(Ab)Using the MetaCPAN API for Fun and Profit(Ab)Using the MetaCPAN API for Fun and Profit
(Ab)Using the MetaCPAN API for Fun and Profit
Olaf Alders
 
JSON + MariaDB: Hybrid Model Best Practices
JSON + MariaDB: Hybrid Model Best PracticesJSON + MariaDB: Hybrid Model Best Practices
JSON + MariaDB: Hybrid Model Best Practices
Rob Hedgpeth
 
Metaprogramming in Haskell
Metaprogramming in HaskellMetaprogramming in Haskell
Metaprogramming in Haskell
Hiromi Ishii
 
Php
PhpPhp
Connect() Mini 2016
Connect() Mini 2016Connect() Mini 2016
Connect() Mini 2016
Jeff Chu
 
DataMapper @ RubyEnRails2009
DataMapper @ RubyEnRails2009DataMapper @ RubyEnRails2009
DataMapper @ RubyEnRails2009
Dirkjan Bussink
 
Beginning Scala Svcc 2009
Beginning Scala Svcc 2009Beginning Scala Svcc 2009
Beginning Scala Svcc 2009
David Pollak
 
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdfSQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
arrowit1
 

Similar to Quill + Spark = Better Together (20)

Perl6 grammars
Perl6 grammarsPerl6 grammars
Perl6 grammars
 
CS 542 Controlling Database Integrity and Performance
CS 542 Controlling Database Integrity and PerformanceCS 542 Controlling Database Integrity and Performance
CS 542 Controlling Database Integrity and Performance
 
CS 542 Database Index Structures
CS 542 Database Index StructuresCS 542 Database Index Structures
CS 542 Database Index Structures
 
Kotlin for Android Developers
Kotlin for Android DevelopersKotlin for Android Developers
Kotlin for Android Developers
 
Php functions
Php functionsPhp functions
Php functions
 
Functional Principles for OO Developers
Functional Principles for OO DevelopersFunctional Principles for OO Developers
Functional Principles for OO Developers
 
Feature-Engineering-Earth-Advocacy-Project-2015
Feature-Engineering-Earth-Advocacy-Project-2015Feature-Engineering-Earth-Advocacy-Project-2015
Feature-Engineering-Earth-Advocacy-Project-2015
 
The Art of Transduction
The Art of TransductionThe Art of Transduction
The Art of Transduction
 
Using R for Building a Simple and Effective Dashboard
Using R for Building a Simple and Effective DashboardUsing R for Building a Simple and Effective Dashboard
Using R for Building a Simple and Effective Dashboard
 
Ruby Language - A quick tour
Ruby Language - A quick tourRuby Language - A quick tour
Ruby Language - A quick tour
 
Writeable ct es_pgcon_may_2011
Writeable ct es_pgcon_may_2011Writeable ct es_pgcon_may_2011
Writeable ct es_pgcon_may_2011
 
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
[MongoDB.local Bengaluru 2018] Tutorial: Pipeline Power - Doing More with Mon...
 
(Ab)Using the MetaCPAN API for Fun and Profit
(Ab)Using the MetaCPAN API for Fun and Profit(Ab)Using the MetaCPAN API for Fun and Profit
(Ab)Using the MetaCPAN API for Fun and Profit
 
JSON + MariaDB: Hybrid Model Best Practices
JSON + MariaDB: Hybrid Model Best PracticesJSON + MariaDB: Hybrid Model Best Practices
JSON + MariaDB: Hybrid Model Best Practices
 
Metaprogramming in Haskell
Metaprogramming in HaskellMetaprogramming in Haskell
Metaprogramming in Haskell
 
Php
PhpPhp
Php
 
Connect() Mini 2016
Connect() Mini 2016Connect() Mini 2016
Connect() Mini 2016
 
DataMapper @ RubyEnRails2009
DataMapper @ RubyEnRails2009DataMapper @ RubyEnRails2009
DataMapper @ RubyEnRails2009
 
Beginning Scala Svcc 2009
Beginning Scala Svcc 2009Beginning Scala Svcc 2009
Beginning Scala Svcc 2009
 
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdfSQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
SQL FILE FROM MOODLEUSE [master]GO Object Databa.pdf
 

Recently uploaded

Globus Compute Introduction - GlobusWorld 2024
Globus Compute Introduction - GlobusWorld 2024Globus Compute Introduction - GlobusWorld 2024
Globus Compute Introduction - GlobusWorld 2024
Globus
 
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
informapgpstrackings
 
RISE with SAP and Journey to the Intelligent Enterprise
RISE with SAP and Journey to the Intelligent EnterpriseRISE with SAP and Journey to the Intelligent Enterprise
RISE with SAP and Journey to the Intelligent Enterprise
Srikant77
 
Large Language Models and the End of Programming
Large Language Models and the End of ProgrammingLarge Language Models and the End of Programming
Large Language Models and the End of Programming
Matt Welsh
 
2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx
Georgi Kodinov
 
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Globus
 
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
Mind IT Systems
 
Cracking the code review at SpringIO 2024
Cracking the code review at SpringIO 2024Cracking the code review at SpringIO 2024
Cracking the code review at SpringIO 2024
Paco van Beckhoven
 
Cyaniclab : Software Development Agency Portfolio.pdf
Cyaniclab : Software Development Agency Portfolio.pdfCyaniclab : Software Development Agency Portfolio.pdf
Cyaniclab : Software Development Agency Portfolio.pdf
Cyanic lab
 
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.ILBeyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
Natan Silnitsky
 
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptx
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptxTop Features to Include in Your Winzo Clone App for Business Growth (4).pptx
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptx
rickgrimesss22
 
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdfEnhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
Jay Das
 
Orion Context Broker introduction 20240604
Orion Context Broker introduction 20240604Orion Context Broker introduction 20240604
Orion Context Broker introduction 20240604
Fermin Galan
 
How Recreation Management Software Can Streamline Your Operations.pptx
How Recreation Management Software Can Streamline Your Operations.pptxHow Recreation Management Software Can Streamline Your Operations.pptx
How Recreation Management Software Can Streamline Your Operations.pptx
wottaspaceseo
 
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing SuiteAI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
Google
 
Accelerate Enterprise Software Engineering with Platformless
Accelerate Enterprise Software Engineering with PlatformlessAccelerate Enterprise Software Engineering with Platformless
Accelerate Enterprise Software Engineering with Platformless
WSO2
 
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERRORTROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
Tier1 app
 
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoamOpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
takuyayamamoto1800
 
First Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User EndpointsFirst Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User Endpoints
Globus
 
Vitthal Shirke Microservices Resume Montevideo
Vitthal Shirke Microservices Resume MontevideoVitthal Shirke Microservices Resume Montevideo
Vitthal Shirke Microservices Resume Montevideo
Vitthal Shirke
 

Recently uploaded (20)

Globus Compute Introduction - GlobusWorld 2024
Globus Compute Introduction - GlobusWorld 2024Globus Compute Introduction - GlobusWorld 2024
Globus Compute Introduction - GlobusWorld 2024
 
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
Field Employee Tracking System| MiTrack App| Best Employee Tracking Solution|...
 
RISE with SAP and Journey to the Intelligent Enterprise
RISE with SAP and Journey to the Intelligent EnterpriseRISE with SAP and Journey to the Intelligent Enterprise
RISE with SAP and Journey to the Intelligent Enterprise
 
Large Language Models and the End of Programming
Large Language Models and the End of ProgrammingLarge Language Models and the End of Programming
Large Language Models and the End of Programming
 
2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx
 
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
 
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
Custom Healthcare Software for Managing Chronic Conditions and Remote Patient...
 
Cracking the code review at SpringIO 2024
Cracking the code review at SpringIO 2024Cracking the code review at SpringIO 2024
Cracking the code review at SpringIO 2024
 
Cyaniclab : Software Development Agency Portfolio.pdf
Cyaniclab : Software Development Agency Portfolio.pdfCyaniclab : Software Development Agency Portfolio.pdf
Cyaniclab : Software Development Agency Portfolio.pdf
 
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.ILBeyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
Beyond Event Sourcing - Embracing CRUD for Wix Platform - Java.IL
 
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptx
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptxTop Features to Include in Your Winzo Clone App for Business Growth (4).pptx
Top Features to Include in Your Winzo Clone App for Business Growth (4).pptx
 
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdfEnhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
Enhancing Project Management Efficiency_ Leveraging AI Tools like ChatGPT.pdf
 
Orion Context Broker introduction 20240604
Orion Context Broker introduction 20240604Orion Context Broker introduction 20240604
Orion Context Broker introduction 20240604
 
How Recreation Management Software Can Streamline Your Operations.pptx
How Recreation Management Software Can Streamline Your Operations.pptxHow Recreation Management Software Can Streamline Your Operations.pptx
How Recreation Management Software Can Streamline Your Operations.pptx
 
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing SuiteAI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
 
Accelerate Enterprise Software Engineering with Platformless
Accelerate Enterprise Software Engineering with PlatformlessAccelerate Enterprise Software Engineering with Platformless
Accelerate Enterprise Software Engineering with Platformless
 
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERRORTROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
TROUBLESHOOTING 9 TYPES OF OUTOFMEMORYERROR
 
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoamOpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
 
First Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User EndpointsFirst Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User Endpoints
 
Vitthal Shirke Microservices Resume Montevideo
Vitthal Shirke Microservices Resume MontevideoVitthal Shirke Microservices Resume Montevideo
Vitthal Shirke Microservices Resume Montevideo
 

Quill + Spark = Better Together

  • 2.
  • 3.
  • 4.
  • 5. So What’s The Difference? • Abstraction • Encapsulation • Error Handling • Good Control Flow • Performance Application Development Languages Data Retrieval 
 Languages • Natural Expression • Possible Optimization • Good Control Flow • Performance
  • 6. They Make Different Tradeoffs! AbstractionPower Possible Optimizations Data Retrieval
 Languages Application Development Languages
  • 7. Example Please??? CREATE VIEW HelloAmerican AS SELECT 'Hello ' || t.firstName + ' ' || t.lastName + ' of ' || a.city FROM Americans t JOIN Addresses a on t.address_id == a.id -- Hello John James of New York CREATE VIEW HelloCanadian AS SELECT 'Hello ' + t.name + ' ' + t.surname + ' of ' + a.city FROM Canadians t JOIN Addresses a on t.residence_id == a.id -- Hello Jim Jones of Toronto CREATE VIEW HelloYeti AS SELECT 'Hello ' + t.gruntingSound + ' ' + t.roaringSound + ' of ' + a.city FROM AbominableShowmen t JOIN Addresses a on t.cave_id == a.id -- Hello Aaargalah Gralala of Kholat Syakhl
  • 8. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id CREATE VIEW AmericanClients AS SELECT * from AddressToSomeone( SELECT t.firstName as called, t.lastName as alsoCalled, a.address_id as whereHeLives_id FROM Americans ) CREATE VIEW CanadianClients AS SELECT * from AddressToSomeone( SELECT t.name as called, t.surname as alsoCalled, a.residence_id as whereHeLives_id FROM Canadians ) CREATE VIEW YetiClients AS SELECT * from AddressToSomeone( SELECT t.gruntingSound as called, t.roaringSound as alsoCalled, a.cave_id as whereHeLives_id FROM AbominableShowmen )
  • 9. CREATE FUNCTION concatName ( @called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id) ) AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city SELECT concatName(t.firstName, t.lastName, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.name, t.surname, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.gruntingSound, t.roaringSound, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id
  • 10. CREATE FUNCTION concatName ( @called VARCHAR; @alsoCalled VARCHAR; @whereHeLives_id) ) AS 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' ||t .called || ' ' || t.alsoCalled || ' of ' || a.city, CASE WHEN zd.zone_type = 'K' THEN 'StandardCategory' WHEN zd.zone_type = 'N' AND rc.barbaz = 'GT' THEN 'NonStandardCategory' ELSE 'UnknownCategory' END as zoning_category1, CASE WHEN ru.kdd = 'IK' THEN 'Insanity' WHEN zd.kdd = 'N' AND rc.barbaz = 'GTT' THEN 'MoreInsanity' ELSE 'I_Dont_Even_Know_What_Goes_Here' END as zoning_category2 FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id = a.id JOIN ResidenceUnit ru on a.rid = ru.id JOIN ResidenceClass rc on ru.class_id = rc.class_id JOIN ZoningDesignation zd on ru.zone_id = zd.rzid and zd.cid = rc.class_id SELECT concatName(t.firstName, t.lastName, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.name, t.surname, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id SELECT concatName(t.gruntingSound, t.roaringSound, a.city) FROM American t JOIN Addresses a on t.whereHeLives_id == a.id
  • 11. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id
  • 12. CREATE FUNCTION AddressToSomeone ( @humanoidLivingSomewhere Table(called VARCHAR; alsoCalled VARCHAR; whereHeLives_id) ) SELECT 'Hello ' || t.called || ' ' || t.alsoCalled || ' of ' || a.city FROM @humanoidLivingSomewhere t JOIN Addresses a on t.whereHeLives_id == a.id WHERE a.current = true DataFrame Can! def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 13. DataFrame Can! addressToSomeone( americans.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) addressToSomeone( canadians.select($"name" as "called", $"surname" as "alsoCalled", $"residence_id" as "whereHeLives_id") ) addressToSomeone( yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled", $"cave_id" as "whereHeLives_id") ) def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"id" === $"whereHeLives_id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 14. DataFrame Can… addressToSomeone( americans.select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) addressToSomeone( canadians.select($"name" as "called", $"surname" as "alsoCalled", $"residence_id" as "whereHeLives_id") ) addressToSomeone( yeti.select($"gruntSound" as "called", $"roarSound" as "alsoCalled", $"cave_id" as "whereHeLives_id") ) def addressToSomeone(df: DataFrame) = { df.as("t") .join(addresses.as("a"), $"whereHeLives_id" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) }
  • 15. … Hurt! def insaneJoin(df: DataFrame) = df.as("t") .join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id") .join(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .join(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rid") && ($"zd.cid" === $"rc.class_id") ) .select( concat( lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"), when($"zd.zone_type" === lit("K"), "StandardCategory") .when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")), "NonStandardCategory") .otherwise("UnknownCategory") .as("zoning_category1"), when($"ru.kdd" === lit("IK"), "Insanity") .when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")), "MoreInsanity") .otherwise("I_Dont_Even_Know_What_Goes_Here") .as("zoning_category2") )
  • 16. … Hurt! def insaneJoin(df: DataFrame) = df.as("t") .join(addresses.as("a"), $"t.whereHeLives_id" === $"a.id") .join(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .join(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .join(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rzid") && ($"zd.cid" === $"rc.class_id") ) .select( concat( lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city"), when($"zd.zone_type" === lit("K"), "StandardCategory") .when(($"zd.zone_type" === lit("N")) && ($"rc.barbaz" === lit("GT")), "NonStandardCategory") .otherwise("UnknownCategory") .as("zoning_category1"), when($"ru.kdd" === lit("IK"), "Insanity") .when(($"zd.kdd" === lit("N")) && ($"rc.barbaz" === lit("GTT")), "MoreInsanity") .otherwise("I_Dont_Even_Know_What_Goes_Here") .as("zoning_category2") )
  • 17.
  • 18. case class HumanoidLivingSomewhere( called:String, alsoCalled: String, whereHeLives_id:Int )
  • 19. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"a.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"ru.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"ru.zone_id" === "zd.rzid") && ($"zd.cid" === $"ru.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 20. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 21. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) } Dataset[(HumanoidLivingSomewhere, Address)]
  • 22. Wait… Dataset? def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) } Dataset[ (((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass) ]
  • 23. def insaneJoin(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 24. case class American( firstName:String, lastName:String, address_id:Int, irrelevantP1:String... irrelevantP100:String ) case class Canadian( name:String, surname:String, residence_id:Int, irrelevantP1:String... irrelevantP100:String ) case class Yeti( gruntingSound:String, roaringSound:String, address_id:Int, irrelevantP1:String... irrelevantP100:String ) Say There's Stuff We Don't Care About case class Address( id:Int, street:String, city:String, current: Boolean irrelevantA1:String... irrelevantA100:String ) case class HumanoidLivingSomewhere( called:String, alsoCalled: String, whereHeLives_id:Int )
  • 25. Let's Plug it In! def addressToSomeone(humanoidLivingSomewhere: DataFrame) = { humanoidLivingSomewhere.as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) } addressToSomeone( americans.select( $"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") ) americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true))
  • 26. Explain This! *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain()
  • 27. SQL Does the Same Thing spark.sql( """ |select concat('Hello ', t.called, ' ', t.alsoCalled, ' of ', a.city) as _1 |from ( | select firstName as called, lastName as alsoCalled, address_id as whereHeLives_id | from americans |) as t |join addresses a on (t.whereHeLives_id = a.id) |where a.current = true |""".stripMargin ) .explain() *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)],
  • 28. *(5) Project [concat(Hello , called, , alsoCalled, of , city)] +- *(5) SortMergeJoin [whereHeLives_id], [id], Inner +- Exchange hashpartitioning(whereHeLives_id) +- *(1) Project [firstName AS called, ... AS whereHeLives_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain() Explain This!
  • 29. In English Please? *(🤞) Gimme My Result! [concat(Hello , called, , alsoCalled, of , city)] +- *(💂) We're Joining! Huzzah! [whereHeLives_id], [id], Inner +- Join Key for the Left Side! (whereHeLives_id) +- *(1) Rename these like I said! Pronto! [firstName as Called... ] +- *(😇) I'm a smart format, load only: [firstName,lastName,address_id] +- Join Key for the Right Side! (id) +- *(😇) I'm a smart format, load only: [id,city,current] Read only current addr. from the file! 😎: [EqualTo(current,true)], americans .select($"firstName" as "called", $"lastName" as "alsoCalled", $"address_id" as "whereHeLives_id") .as("t") .join(addresses.as("a"), $"whereHeLivesId" === $"id") .select( concat(lit("Hello "), $"t.called", lit(" "), $"t.alsoCalled", lit(" of "), $"a.city")) .filter($"a.current" === lit(true)) .explain()
  • 30. How About Dataset? def addressToSomeone(humanoidsLivingSomewhere: Dataset[HumanoidLivingSomewhere]) = { humanoidsLivingSomewhere .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } } val americanClients = addressToSomeone( americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) ) americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 31. Explain This... Please! *(3) SerializeFromObject [UTF8String] +- *(3) MapElements java.lang.String +- DeserializeToObject newInstance(Tuple2) +- SortMergeJoin [_1.whereHeLives_id], [_2.id], Inner +- Exchange hashpartitioning(_1.whereHeLives_id) +- *(1) Project [called, alsoCalled, whereHeLives_id] +- *(1) SerializeFromObject [UTF8String] +- *(1) MapElements HumanoidLivingSomewhere +- DeserializeToObject newInstance(American) +- FileScan parquet [firstName,lastName,address_id,irrelevantP1,irrelevantP2,i +- Exchange hashpartitioning(_2.id) +- FileScan parquet [id,street,city,current,irrelevantA1,irrelevantA2,irrelevantA3,irrel PushedFilters: [] americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } .explain()
  • 32. *(🤮) Serialize Back Into a String Expensive! +- *(3) Do the Outer Map that we Invoked +- (🤮) Deserialize Tuple2 Expensive! +- & We're Joining! Huzzah! [_1.whereHeLives_id], [_2.id], Inner +- Join Key for the Left Side (_1.whereHeLives_id) +- *(1) Project [called, alsoCalled, whereHeLives_id] +- *(🤮) Serialize the Join Key. Expensive! +- *(1) MapElements HumanoidLivingSomewhere +- (🤮) Deserialize into a JVM Object (i.e. class American) +- Scan All 'American' Columns Including 100 irrelevant ones!😱 +- Join Key for the Right Side (_2.id) +- Scan All 'Address' Columns Including 100 irrelevant ones! 😱 We Need to Read The Entire Dataset! No Excluding Non-Current Addresses 😢 americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } .explain()
  • 33. americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 34. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String }
  • 35. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } Which Columns are we using in here? Which Columns are we using in here? Which Columns are we using in here? ⏸
  • 36. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } I Guess We Need
 All Of Them! I Guess We Need
 All Of Them! I Guess We Need
 All Of Them! ⏸
  • 37. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String }
  • 38. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map(¯_(ツ)_/¯ : HumanoidLivingSomewhere) .joinWith(addresses, ¯_(ツ)_/¯) .filter { ¯_(ツ)_/¯ : Boolean } .map { ¯_(ツ)_/¯ : String } ⏸ What columns am I joining by???
  • 39. val americanClients = americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.address_id) ) .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { tup => tup._2.current == true } .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" } What We See: What Catalyst Sees: val americanClients = americans.map( (🖼 American) => HumanoidLivingSomewhere 🖼 ) 🤮 .joinWith(addresses, $"whereHeLives_id" === $"id") .filter { (🖼 HumanoidLivingSomewhere) => Boolean } 🤮 .map { (🖼 HumanoidLivingSomewhere) => String 🖼 } 🤮 → → → → →
  • 40. americans.map(a => HumanoidLivingSomewhere(a.firstName, a.lastName, a.addressId)) .joinWith(addresses, $"id" === $"whereHeLivesId") .filter(ta => ta._2.current == true) .map { case (t, a) => s"Hello ${t.called} ${t.alsoCalled} of ${a.city}" }
  • 41. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) } What About Frameless? addressToSomeone( americans.select(americans('firstName), americans('lastName), americans('addressId)) .deserialized.map{ case (name, age, whereHeLives_id ) => HumanoidLivingSomewhere( name.asInstanceOf[String], age.asInstanceOf[String], whereHeLives_id.asInstanceOf[Int]) } )
  • 42. What About Frameless? def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) } addressToSomeone( americans.select(americans('firstName), americans('lastName), americans('addressId)) .deserialized.map{ case (name, age, whereHeLives_id ) => HumanoidLivingSomewhere( name.asInstanceOf[String], age.asInstanceOf[String], whereHeLives_id.asInstanceOf[Int]) } )
  • 43. What About Frameless? def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val joined = humanoid .joinInner(addresses) { humanoid('where) === addresses('id) } joined.select(concat( lit("Hello "), joined.colMany('_1, 'called), lit(" "), joined.colMany('_1, 'alsoCalled), lit(" of "), joined.colMany('_2, 'city))) }
  • 44. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } What Sub-Tuple is class_id inside of?
  • 45. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } What Sub-Tuple is class_id inside of? Dataset[ (((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass) ]
  • 46. def addressToSomeone(humanoid: TypedDataset[HumanoidLivingSomewhere]) = { val j1 = humanoid.joinInner(addresses) { humanoid('whereHeLives_id) === addresses('id) } val j2 = j1.joinInner(residenceUnit) { j1.colMany('_2, 'rid) === residenceUnit('id) } val j3 = j2.joinInner(residenceClass) { j2.colMany('_2, 'class_id) === residenceClass('class_id) } val j4 = j3.joinInner(zoningDesignation) { (j3.colMany('_1, '_2, 'zone_id) === zoningDesignation('rzid)) && (zoningDesignation('rzid) === j3.colMany('_1, '_, 'class_id)) } type OUT = ((((HumanoidLivingSomewhere, Address), ResidenceUnit), ResidenceClass), ZoningDesignation) j4.select( concat( lit("Hello "), j4.colMany('_1, '_1, '_1, '_1, 'called), lit(" "), j4.colMany('_1, '_1, '_1, '_1, 'alsoCalled), lit(" of "), j4.colMany('_1, '_1, '_1, '_2, 'city) ), when(j4.colMany('_2, 'zone_type) === "K", lit[String, OUT]("StandardCategory")) .when((j4.colMany('_2, 'zone_type) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GT"), lit("NonStandardCategory")) .otherwise( lit("UnknownCategory")), when(j4.colMany('_1, '_1, '_2, 'kdd) === "IK", lit[String, OUT]("Insanity")) .when((j4.colMany('_2, 'kdd) === "N") && (j4.colMany('_1, '_2, 'barbaz) === "GTT"), lit("MoreInsanity")) .otherwise(lit("I_Dont_Even_Know_What_Goes_Here")) ) } Also... What's This???
  • 47. [error] found : frameless.TypedColumn[Nothing,String] [error] required: frameless.AbstractTypedColumn[((((org.ctl.complex.HumanoidLivingSomewhere, org.ctl.complex.Address), org.ctl.complex.ResidenceUnit), org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation),String] [error] Note: Nothing <: ((((org.ctl.complex.HumanoidLivingSomewhere, org.ctl.complex.Address), org.ctl.complex.ResidenceUnit), org.ctl.complex.ResidenceClass), org.ctl.complex.ZoningDesignation), but class AbstractTypedColumn is invariant in type T. [error] You may wish to define T as +T instead. (SLS 4.5) [error] when(j4.colMany('_2, 'zone_type) === "K", lit("StandardCategory"))
  • 48. DataFrame/SQL Untyped 😢 Column Pruning Filter Pushdown 😎 Dataset Almost Typed 😕 Extra Serialization 🤮 Frameless Typed 😃 Very Complex if you don't know Shapeless.
  • 50.
  • 51. ? Scala Code SQL DataFrame
  • 52. ? Scala Code SQL DataFrame TypecheckingPower Possible Optmizations
  • 53. Scala Code SQL Quill DataFrame quote { scala-syntax-tree } AST Macro
  • 54. Scala Code SQL Quill quote { scala-syntax-tree } AST Macro
  • 55. Query[R] SQL Quill quote { Query[R] } AST Macro Quoted[Query[R]]
  • 56.
  • 57.
  • 58. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[?]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 59. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 60. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( ) "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Query [Humanoid] Humanoid Query [Address] Address ⏸
  • 61. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 62. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 63. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses if ( a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 64. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 65. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 66. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Address
  • 67. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h LEFT JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.leftJoin(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } Address Option[Address] Address
  • 68. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 69. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == trueString
  • 70. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Query[String]
  • 71. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Query[Humanoid] => Query[String] Query[String]
  • 72. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[String]] Query[Humanoid] => Query[String] Query[String]
  • 73. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield h } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT h.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Humanoid]] Query[Humanoid] => Query[Humanoid] Query[Humanoid]
  • 74. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield a } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT a.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Address]] Query[Humanoid] => Query[Address] Query[Address]
  • 75. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield (h, a) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT h.*, a.* FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[(Humanoid, Address)]] Query[Humanoid] => Query[(Humanoid, Address)] Query[(Humanoid, Address)]
  • 76. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield (Foobar(h, a)) } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT ?? I don't understand objects ?? FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true Quoted[Query[Humanoid] => Query[Foobar]] Query[Humanoid] => Query[Foobar] Query[Foobar]
  • 77. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 78. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 79. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 80. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 81. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 82. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 83. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory" )
  • 84. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 85. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.r && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 86. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) [error] quillspark-examples/src/main/scala/Main.scala:107:28: value rid is not a member of org.ctl.complex.ZoningDesignation [error] ru.zone_id == zd.rid && zd.cid == rc.class_id [error] ^
  • 87. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) ⏸
  • 88. humanoidsLivingSomewhere.as("t") .joinWith(addresses.as("a"), $"whereHeLives_id" === $"id") .joinWith(residenceUnit.as("ru"), $"_2.rid" === $"ru.id") .joinWith(residenceClass.as("rc"), $"_2.class_id" === $"rc.class_id") .joinWith(zoningDesignation.as("zd"), ($"_1._2.zone_id" === "zd.rzid") && ($"zd.cid" === $"_1._2.class_id") ) .map { case ((((t, a), ru), rc), zd) => ( s"Hello ${t.called} ${t.alsoCalled} of ${a.city}", if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandardCategory" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" ) }
  • 89. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 90. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory", if (ru.kdd == "IK") "Insanity" else if (zd.kdd == "N" && rc.barbaz == "GTT") "MoreInsanity" else "I_Dont_Even_Know_What_Goes_Here" )
  • 91. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city, if (zd.zone_type == "K") "StandardCategory" else if (zd.zone_type == "N" && rc.barbaz == "GT") "NonStandard" else "UnknownCategory" )
  • 92. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) zd <- zoningDesignation.join(zd => ru.zone_id == zd.rid && zd.cid == rc.class_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 93. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id)
 rc <- residenceClass.join(rc => ru.class_id == rc.class_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 94. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) ru <-residenceUnit.join(ru => a.rid == ru.id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 95. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 96. for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city )
  • 97. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 98. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => } CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) ▶
  • 99. CREATE FUNCTION AddressToSomeone ( @humanoid Table( called VARCHAR; alsoCalled VARCHAR; whereHeLives_id ) ) SELECT 'Hello ' || h.called || ' ' || h.alsoCalled || ' of ' || a.city FROM @humanoid h JOIN Addresses a ON h.whereHeLives_id == a.id WHERE a.current == true ▶ case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) }
  • 100. case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true quote { addressToSomeone(americans.map(am => 
 Humanoid(am.firstName, am.lastName, am.address_id))) }
  • 101. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } quote {addressToSomeone(canadians.map(am => 
 Humanoid(am.name, am.surname, am.residence_id)))} SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true
  • 102. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))} SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 103. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true case class Humanoid( called:String, alsoCalled: String, whereHeLives_id:Int ) val addressToSomeone = quote { (humanoid: Query[Humanoid]) => for { h <- humanoid a <- addresses.join(a => a.id == h.whereHeLives_id ) if (a.current == true) } yield ( "Hello " + h.called + " " + h.alsoCalled + " of " + a.city ) } SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true ▶ quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))}
  • 104. SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true DataFrame quote {addressToSomeone(yeti(am => 
 Humanoid(am.gruntingSound, am.roaringSound, am.cave_id)))} quote {addressToSomeone(canadians.map(am => 
 Humanoid(am.name, am.surname, am.residence_id)))} quote { addressToSomeone(americans.map(am => 
 Humanoid(am.firstName, am.lastName, am.address_id))) } run(Query[String]) run(Query[String]) run(Query[String])
  • 105. DataFrame
 Dataset[String] SELECT 'Hello ' || h.firstName || ' ' || h.lastName || ' of ' || a.city FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT 'Hello ' || h.name || ' ' || h.surname || ' of ' || a.city FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true run(Query[String]) run(Query[String]) run(Query[String])
  • 106. DataFrame
 Dataset[Humanoid] SELECT h FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT h FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT h FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true run(Query[Humanoid]) run(Query[Humanoid]) run(Query[Humanoid])
  • 107. DataFrame
 Dataset[T] run(Query[T]) run(Query[T]) run(Query[T]) SELECT ? FROM Americans h JOIN Addresses a ON h.address_id == a.id WHERE a.current == true SELECT ? FROM Canadians h JOIN Addresses a ON h.residence_id == a.id WHERE a.current == true SELECT ? FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 110. Dataset[T] DataFrame Easy! Just '.toDF' Harder! '.[DoIReallyKnowItsThis?]' run(Query[T])
  • 111. val spark = SparkSession.builder() .appName("SparkQuillExample") .enableHiveSupport() .getOrCreate() implicit val sqlContext = spark.sqlContext import sqlContext.implicits._ import QuillSparkContext._ val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] Dataset[Yeti] Dataset[Address]
  • 112. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } Quoted[Query[Yeti]] Quoted[Query[Address]] Dataset[Yeti] Dataset[Address]
  • 113. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) }
  • 114. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } val yetiOfSomeplace: Dataset[String] = run(output) val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) }
  • 115. val yetiDS = spark.read.parquet("output/yeti").as[Yeti] val addressesDS = spark.read.parquet("output/addresses").as[Address] val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yeti = quote { liftQuery(yetiDS) } val addresses = quote { liftQuery(addressesDS) } val yetiOfSomeplace: Dataset[String] = run(output) val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } Run This Query: Then Give Me Back My Dataset!!! SELECT 'Hello ' || h.gruntingSound || ' ' || h.roaringSound || ' of ' || a.city FROM Yeti h JOIN Addresses a ON h.cave_id == a.id WHERE a.current == true
  • 116. val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val yetiOfSomeplace: Dataset[String] = run(output) *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)] val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) }
  • 117. val addressToSomeone = quote { (humanoids: Query[Humanoid]) => for { h <- humanoids a <- addresses.join(a => a.id == h.whereHeLives_id) if (a.current) } yield "Hello " + h.called + " " + h.alsoCalled + " of " + a.city } val output = quote { addressToSomeone( yeti.map(am => Humanoid(am.firstName, am.lastName, am.address_id) ) ) } val yetiOfSomeplace: Dataset[String] = run(output) *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)] Optimized + No Serialization 😎 Optimized + No Serialization 😎
  • 118. val yetiOfSomeplace: Dataset[String] = run(output) val addressToSomeone = quote { Quill Magic! } ▶ val output = quote { Quill Magic! } val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti] val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address] *(5) Project [concat(Hello , firstName, , lastName, of , city)] +- *(5) SortMergeJoin [address_id], [id], Inner +- Exchange hashpartitioning(address_id) +- *(1) Project [firstName, lastName, address_id] +- *(1) FileScan parquet [firstName,lastName,address_id] +- Exchange hashpartitioning(id) +- *(3) FileScan parquet [id,city,current] PushedFilters: [EqualTo(current,true)]
  • 119. ▶ val yetiOfSomeplace: Dataset[String] = run(output) val addressToSomeone = quote { Quill Magic! } val output = quote { Quill Magic! } val yetiDS: Dataset[Yeti] = parquet("output/yeti").as[Yeti] val addressesDS: Dataset[Address] = parquet("output/addresses").as[Address]
  • 120. // Applicative Joins yeti.join(addresses).on(_.caveId == _.id) yeti.leftJoin(addresses).on(_.caveId == _.id) // Implicit Joins for { y <- yeti a <- addresses if (y.caveId == a.id) } yield (y, a) // Semi-Joins val cavelessYeti = quote { yeti.filter(y => !addresses.map(_.id).contains(y.caveId)) } Some other stuff we can do...
  • 121. Some other stuff we can do... // Group-By orders.groupBy(_.sku).map { case (sku, orders) => (sku, orders.map(_.price).avg) } // Concat-Map val nodesChildren = quote { (ns: Query[Node]) => ns.concatMap(n => n.children) } // Union/UnionAll val americansAndCanadians = quote { americans.map(_.firstName) unionAll canadians.map(_.surname) }
  • 122. Some other stuff we can do... // User Defined Aggregation Functions (UDAFs) spark.udf.register("geomMean", new GeometricMean) val geomMean = quote { (q: Query[BigDecimal]) => infix"geomMean(${q})".as[BigDecimal] } orders.groupBy(_.sku).map { case (sku, orders) => (sku, geomMean(orders.map(_.price))) } // Using Spark UDFs spark.udf.register("businessLogicUdf", (str:String) => str + "-suffix") val businessLogicUdf = quote { (str: String) => infix"businessLogicUdf(${str})".as[String] } quote { yeti.map(y => businessLogicUdf(y.gruntingSound)) }
  • 123.
  • 124. https://getquill.io/ https://github.com/getquill/quill https://gitter.im/getquill/quill ...Try It Out! libraryDependencies ++= Seq( "io.getquill" %% "quill-spark" % "3.4.10" ) <dependency> <groupId>io.getquill</groupId> <artifactId>quill-spark_2.12</artifactId> <version>3.4.10</version> </dependency>