Despite the NoSQL movement trying to flag traditional databases as a dying breed, the RDBMS keeps evolving and adding new powerful weapons to its arsenal. In this talk we'll explore Common Table Expressions (SQL-99) and how SQL handles recursion, breaking the bi-dimensional barriers and paving the way to more complex data structures like trees and graphs, and how we can replicate features from social networks and recommendation systems. We'll also have a look at window functions (SQL:2003) and the advanced reporting features they make finally possible.
6. Foreword
S ( b, c) AS first_not_null
chyA
COALESCE(a,
ie IS NOT NULL r ar
h
UN E col2
WHERE O
R SV N
IBOU VER (PART
U b D ITION
R EC D ED BY de
nt
N PR ptno)
TH SELECT * FROM table
weighT)(*) AS c
2
)
http://www.alberton.info/talks
I A age EC
Nt
(
a SUM ED
W HA INN ER L ING
E N JOIN
L
JOI
VIWE t2 Nt
NGX A
COU
T E
TER
A EPT
C
BE LEFTVG t2 ON t1.id = t2.id
JOIN N
MIN(
(sal
Oary) EPT
OU
I
N EXC 50 >
HT
U 00
RIG
3
10. Recommendation Systems
Customers who bought this item also bought
Music Recommended by Last.fm
6
11. Recommendation Systems
Customers who bought this item also bought
Music Recommended by Last.fm
items linked by
similar patterns in
users’ behaviour
6
12. Recommendation Systems
Customers who bought this item also bought
Music Recommended by Last.fm
6
13. Recommendation Systems
items linked by
Customers who bought this item also bought
similar features
(e.g. genre)
Music Recommended by Last.fm
6
33. Graph Representation in the DB
Adjacency Matrix
Nodes N1 N2 N3
N1
N2
N3
...
Requires DDL statements
ALTER TABLE nodes ADD n4 (...)
16
34. Graph Representation in the DB
Adjacency Matrix
Nodes N1 N2 N3
N1
N2
N3
...
Requires DDL statements
ALTER TABLE nodes ADD n4 (...)
16
35. Graph Representation in the DB
Adjacency Matrix Adjacency List
Nodes N1 N2 N3 src dest
N1
N2
N3
...
Requires DDL statements Only DML statements
ALTER TABLE nodes ADD n4 (...) INSERT, DELETE, UPDATE
16
36. Nodes and Edges
nodes
CREATE TABLE nodes (
id INTEGER PRIMARY KEY,
node name age
name VARCHAR(10) NOT NULL,
feat1 CHAR(1), -- e.g., age
feat2 CHAR(1) -- school or company
);
edges
CREATE TABLE edges (
a INTEGER NOT NULL REFERENCES nodes(id) a b
ON UPDATE CASCADE ON DELETE CASCADE,
b INTEGER NOT NULL REFERENCES nodes(id)
ON UPDATE CASCADE ON DELETE CASCADE,
PRIMARY KEY (a, b)
);
CREATE INDEX a_idx ON edges (a);
CREATE INDEX b_idx ON edges (b);
17
37. Nodes and Edges
-- Undirected Graphs: constraint on the uniqueness of the pair
CREATE UNIQUE INDEX pair_unique_idx a b
ON edges (LEAST(a,b), GREATEST(a,b));
3 5
5 3
3 5
-- No self-loops (node linking to itself)
ALTER TABLE edges ADD CONSTRAINT
no_self_loops_chk CHECK (a <> b);
18
38. Nodes and Edges
-- Undirected Graphs: constraint on the uniqueness of the pair
CREATE UNIQUE INDEX pair_unique_idx a b
ON edges (LEAST(a,b), GREATEST(a,b));
3 5
5 3
3 5
x
x
-- No self-loops (node linking to itself)
ALTER TABLE edges ADD CONSTRAINT
no_self_loops_chk CHECK (a <> b);
18
39. Sample Graph
x x
w w
2 3
x z
y w
1 4
x
y
7 5
x
6 y
x
z
19
40. Sample Graph
nodes edges
node name feat1 feat2 a b
1 3
1 node1 x y
2 1
2 node2 x w 2 4
3 node3 x w 3 4
3 5
4 node4 z w 3 6
5 node5 x y 4 7
5 1
6 node6 x z
5 6
7 node7 x y 6 1
20
41. Traversing the Graph
Select nodes connected to node 1
a b
-- Directed Graphs 1 3
2 1
SELECT *
FROM nodes n 3 6
LEFT JOIN edges e ON n.id = e.b
WHERE e.a = 1;
4 7
5 1
5 6
21
42. Traversing the Graph
Select nodes connected to node 1
a b
-- Directed Graphs 1 3
2 1
SELECT *
FROM nodes n 3 6
LEFT JOIN edges e ON n.id = e.b
WHERE e.a = 1;
4 7
5 1
5 6
21
43. Traversing the Graph
Select nodes connected to node 1
a b
-- Directed Graphs 1 3
2 1
SELECT *
FROM nodes n 3 6
LEFT JOIN edges e ON n.id = e.b
WHERE e.a = 1;
4 7
5 1
-- Undirected Graphs
5 6
SELECT * FROM nodes WHERE id IN (
SELECT CASE WHEN a = 1 THEN b ELSE a END
FROM edges
WHERE 1 IN (a, b)
);
21
44. Traversing the Graph
Select nodes connected to node 1
a b
-- Directed Graphs 1 3
2 1
SELECT *
FROM nodes n 3 6
LEFT JOIN edges e ON n.id = e.b
WHERE e.a = 1;
4 7
5 1
-- Undirected Graphs
5 6
SELECT * FROM nodes WHERE id IN (
SELECT CASE WHEN a = 1 THEN b ELSE a END
FROM edges
WHERE 1 IN (a, b)
);
21
45. Traversing the Entire Graph
$nodes = select_friends($me);
foreach ($nodes as $node) {
$nodes2 = select_friends($node);
foreach ($nodes2 as $node) {
...
...
...
}
}
22
47. Traversing the Full Graph
CREATE TEMPORARY TABLE reached (
id INT PRIMARY KEY
);
INSERT INTO reached VALUES (@root_id);
WHILE(@@rowcount > 0)
BEGIN
INSERT INTO reached (id)
SELECT DISTINCT child_id
FROM edges e
JOIN reached p
ON p.id = e.parent_id
WHERE e.child_id NOT IN (
SELECT id FROM reached
)
END;
23
48. Traversing the Full Graph
CREATE TEMPORARY TABLE reached (
id INT PRIMARY KEY
);
INSERT INTO reached VALUES (@root_id);
WHILE(@@rowcount > 0)
BEGIN
INSERT INTO reached (id)
Temporary
SELECT DISTINCT child_id Table
FROM edges e +
JOIN reached p Stored
ON p.id = e.parent_id
WHERE e.child_id NOT IN ( Procedure
SELECT id FROM reached
)
END;
23
49. Common Table Expressions (CTE)
-- VIEW
CREATE VIEW undirected_graph (a, b) AS (
SELECT a, b FROM edges
UNION ALL
SELECT b, a FROM edges
);
SELECT a, b FROM undirected_graph;
24
50. Common Table Expressions (CTE)
-- VIEW
CREATE VIEW undirected_graph (a, b) AS (
SELECT a, b FROM edges
UNION ALL
SELECT b, a FROM edges
);
SELECT a, b FROM undirected_graph;
-- CTE (dynamic VIEW)
WITH undirected_graph (a, b) AS (
SELECT a, b FROM edges
UNION ALL
SELECT b, a FROM edges
)
SELECT a, b FROM undirected_graph;
24
51. Recursive CTEs - Sequence
WITH RECURSIVE seq (n) AS (
-- non recursive term
SELECT 1 AS n
UNION ALL
-- recursive term
SELECT n + 1
FROM seq
WHERE n < 100
)
SELECT n FROM seq;
25
52. Recursive CTEs - Sequence
WITH RECURSIVE seq (n) AS (
-- non recursive term
SELECT 1 AS n
UNION ALL
-- recursive term
SELECT n + 1
FROM seq
WHERE n < 100
)
SELECT n FROM seq;
25
53. Recursive CTEs - Sequence
WITH RECURSIVE seq (n) AS (
-- non recursive term
SELECT 1 AS n
UNION ALL
-- recursive term
SELECT n + 1
FROM seq
WHERE n < 100
)
SELECT n FROM seq;
25
54. Recursive CTEs - Sequence
WITH RECURSIVE seq (n) AS (
-- non recursive term
SELECT 1 AS n
UNION ALL
-- recursive term
SELECT n + 1
FROM seq
WHERE n < 100
)
SELECT n FROM seq;
25
55. Recursive CTEs - Sequence
WITH RECURSIVE seq (n) AS (
n
-- non recursive term 1
SELECT 1 AS n
2
UNION ALL 3
4
-- recursive term
SELECT n + 1 5
FROM seq
...
WHERE n < 100
100
)
SELECT n FROM seq;
25
56. Recursive CTEs - Fibonacci Seq.
WITH RECURSIVE fib (i, j) AS (
-- non recursive term
SELECT 0, 1
UNION ALL
-- recursive term
SELECT GREATEST(i, j),
(i + j) AS i
FROM fib
WHERE j < 1000
)
SELECT i FROM fib;
26
57. Recursive CTEs - Fibonacci Seq.
WITH RECURSIVE fib (i, j) AS (
-- non recursive term
SELECT 0, 1
UNION ALL
-- recursive term
SELECT GREATEST(i, j),
(i + j) AS i
FROM fib
WHERE j < 1000
)
SELECT i FROM fib;
26
58. Recursive CTEs - Fibonacci Seq.
WITH RECURSIVE fib (i, j) AS (
-- non recursive term
SELECT 0, 1
UNION ALL
-- recursive term
SELECT GREATEST(i, j),
(i + j) AS i
FROM fib
WHERE j < 1000
)
SELECT i FROM fib;
26
59. Recursive CTEs - Fibonacci Seq.
WITH RECURSIVE fib (i, j) AS (
i
-- non recursive term 0
SELECT 0, 1 1
1
UNION ALL
2
-- recursive term 5
SELECT GREATEST(i, j),
8
(i + j) AS i
FROM fib 13
WHERE j < 1000 21
)
...
SELECT i FROM fib;
26
60. Recursive CTEs
CREATE TABLE orgchart (
emp VARCHAR(10) PRIMARY KEY,
boss VARCHAR(10) REFERENCES orgchart(emp),
salary DECIMAL(10,2)
);
INSERT INTO orgchart
(emp, boss, salary) VALUES
(‘A’, NULL, 1000.00),
(‘B’, ‘A’, 900.00),
(‘C’, ‘A’, 900.00),
(‘D’, ‘C’, 800.00), A
(‘E’, ‘C’, 700.00),
(‘F’, ‘C’, 600.00),
(‘G’, ‘B’, 800.00); B C
G D E F
27
61. Recursive CTEs
CREATE TABLE orgchart (
emp VARCHAR(10) PRIMARY KEY,
boss VARCHAR(10) REFERENCES orgchart(emp),
salary DECIMAL(10,2)
emp boss salary
);
A NULL 1000.00
INSERT INTO orgchart B A 900.00
(emp, boss, salary) VALUES C A 900.00
(‘A’, NULL, 1000.00), D C 800.00
(‘B’, ‘A’, 900.00), E C 700.00
(‘C’, ‘A’, 900.00), F C 600.00
(‘D’, ‘C’, 800.00), A G B 800.00
(‘E’, ‘C’, 700.00),
(‘F’, ‘C’, 600.00),
(‘G’, ‘B’, 800.00); B C
G D E F
27
62. Recursive CTEs
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
28
63. Recursive CTEs
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
28
64. Recursive CTEs
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
28
65. Recursive CTEs
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
28
66. Recursive CTEs
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM orgchart emp boss salary
WHERE boss IS NULL A NULL 1000.00
UNION ALL B A 900.00
-- recursive term C A 900.00
SELECT E1.*
FROM orgchart AS E1 D C 800.00
JOIN hierarchy AS E2 E C 700.00
ON E1.boss = E2.emp F C 600.00
) G B 800.00
SELECT *
FROM hierarchy
ORDER BY emp;
28
67. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
29
68. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
29
69. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
29
70. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
29
71. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
29
72. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
emp boss lvl,salary
1 AS lvl tree path
CAST(emp AS TEXT) AS tree,
A NULL|| ‘.’ AS path 1
emp 1000.00 A A.
FROM orgchart
B A 900.00
WHERE boss IS NULL 2 B A.B.
UNION ALL
G B
SELECT E1.*, 800.00 3 G A.B.G.
E2.lvl + 1 AS lvl,
C LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
A 900.00 2 C A.C.
E2.path || E1.emp || ‘.’ AS path
D FROM orgchart 800.00
C 3 D A.C.D.
JOIN hierarchy AS E2
E ON E1.boss =700.00
C E2.emp 3 E A.C.E.
)
SELECT *
F C 600.00 3 F A.C.F.
FROM hierarchy
ORDER BY path;
29
73. Recursive CTEs
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree
FROM orgchart
WHERE boss = ‘A’
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, depth * 2, ‘ ’) AS tree
FROM orgchart
JOIN hierarchy AS E2 ON E1.boss = E2.emp
WHERE E2.lvl < 3 -- termination condition
)
SELECT *
FROM hierarchy
ORDER BY path;
30
78. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
79. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
80. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
81. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
82. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
83. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
84. Transitive Closure with CTEs
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc
ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
ORDER BY a, b, distance;
32
85. Transitive Closure with CTEs
WITHa RECURSIVE transitive_closure (a, path_string
b distance b, distance,
path_string) AS
1 3 1
( SELECT a, b, 1 AS distance, .1.3.
'.' || a || '.' || b || '.' AS path_string
FROM edges 4
1 2 .1.3.4.
UNION ALL 5
1 2 .1.3.5.
1 6 2 .1.3.6.
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
1 6
FROM edges AS e 3 .1.3.5.6.
JOIN transitive_closure AS tc
1 ON e.a = tc.b
7 3 .1.3.4.7.
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
) 2 1 1 .2.1.
SELECT * FROM transitive_closure
... BY a, b, distance; ...
ORDER ... ...
32
88. Travel Planning: Possible Routes
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
WHERE a = 1 -- source
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
WHERE b = 6 -- destination
ORDER BY a, b, distance;
34
89. Travel Planning: Possible Routes
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
WHERE a = 1 -- source
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
WHERE b = 6 -- destination
ORDER BY a, b, distance;
34
90. Travel Planning: Possible Routes
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges
WHERE a = 1 -- source
a
UNION ALL b distance path_string
SELECT tc.a, e.b, tc.distance + 1,
1 tc.path_string ||2e.b || '.' AS path_string
6 .1.3.6.
FROM edges AS e
JOIN transitive_closure 3 tc ON e.a = tc.b
1 6 AS .1.3.5.6.
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT * FROM transitive_closure
WHERE b = 6 -- destination
ORDER BY a, b, distance;
34
93. LinkedIN: Degrees of Separation
WITH RECURSIVE transitive_closure (a, b, distance,
path_string)
AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 1
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT a, b, MIN(distance) AS dist FROM transitive_closure
--WHERE b = 6 (or where name matches pattern)
GROUP BY a, b ORDER BY a, dist, b;
36
94. LinkedIN: Degrees of Separation
WITH RECURSIVE transitive_closure (a, b, distance,
path_string)
AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 1
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT a, b, MIN(distance) AS dist FROM transitive_closure
--WHERE b = 6 (or where name matches pattern)
GROUP BY a, b ORDER BY a, dist, b;
36
95. LinkedIN: Degrees of Separation
WITH RECURSIVE transitive_closure (a, b, distance,
path_string)
AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 1
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
)
SELECT a, b, MIN(distance) AS dist FROM transitive_closure
--WHERE b = 6 (or where name matches pattern)
GROUP BY a, b ORDER BY a, dist, b;
36
96. LinkedIN: Degrees of Separation
WITH RECURSIVE transitive_closure (a, b, distance,
path_string)
AS
a b
( SELECT a, b, 1 AS distance, dist
'.' || a || '.' || b || '.' AS path_string
1
FROM edges2 2 1
WHERE a = 1
UNION 1ALL 3 1
SELECT1 tc.a, e.b, tc.distance + 1,
5 1
tc.path_string || e.b || '.' AS path_string
1
FROM edges2 AS e 6 1
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT 4
1 2
LIKE '%.' || e.b || '.%'
)
... ... ...
SELECT a, b, MIN(distance) AS dist FROM transitive_closure
--WHERE b = 6 (or where name matches pattern)
GROUP BY a, b ORDER BY a, dist, b;
36
99. LinkedIN: Shared Nodes (1)
SELECT b FROM (
[SELECT b FROM transitive_closure]
WHERE a = 1 -- set the starting node (node1)
...
WHERE distance = 0 -- limit the recursion to the first step
UNION ALL
[SELECT b FROM transitive_closure]
WHERE a = 4 -- set the other starting node (node4)
...
WHERE distance = 0 -- limit the recursion to the first step
)
AS immediate_connections
GROUP BY b
HAVING COUNT(b) > 1;
38
100. LinkedIN: Shared Nodes (1)
SELECT b FROM (
[SELECT b FROM transitive_closure]
WHERE a = 1 -- set the starting node (node1)
...
WHERE distance = 0 -- limit the recursion to the first step
UNION ALL
[SELECT b FROM transitive_closure]
WHERE a = 4 -- set the other starting node (node4)
...
WHERE distance = 0 -- limit the recursion to the first step
)
AS immediate_connections
GROUP BY b
HAVING COUNT(b) > 1;
38
101. LinkedIN: Shared Nodes (2)
SELECT b FROM (
SELECT b
FROM edges2
WHERE a = 1 -- set the starting node (node1)
UNION ALL
SELECT b
FROM edges2
WHERE a = 4 -- set the other starting node (node4)
) AS immediate_connections
GROUP BY b
HAVING COUNT(b) > 1;
39
102. LinkedIN: Shared Nodes (3)
SELECT e.b
FROM edges2 AS e
WHERE e.a = 1 -- node1
AND EXISTS (
SELECT *
FROM edges2 AS e2
WHERE e2.a = 4 -- node4
AND e2.b = e.b
)
40
103. LinkedIN: Shared Nodes (3)
SELECT e.b
FROM edges2 AS e
WHERE e.a = 1 -- node1
AND EXISTS (
SELECT *
FROM edges2 AS e2
WHERE e2.a = 4 -- node4
AND e2.b = e.b
)
40
106. LinkedIN: Shared Nodes (4)
SELECT b FROM edges2 WHERE a = 1
INTERSECT
SELECT b FROM edges2 WHERE a = 4
A’s B’s
connections connections
shared
connections
41
111. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
112. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
113. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1 you
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
114. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
115. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
you
HAVING MIN(distance) > 1
ORDER BY b
44
116. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
117. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 8 -- set the target node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
AND tc.distance < 5
)
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
118. LinkedIN: How is connected to you
WITH RECURSIVE tc (a, b, distance, path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
immediate_connection distance_via_node
WHERE a = 8 -- set the target node
UNION ALL
2
SELECT tc.a, e.b, tc.distance + 1, 4
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS 3 4
you e
JOIN tc ON e.a = tc.b
n2
5 4
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND e.b <> 1
)
6
AND tc.distance < 5 3
SELECT b AS immediate_connection, MIN(distance) + 1 AS
distance_via_node FROM tc
WHERE EXISTS (SELECT * FROM edges2 WHERE a = 1 AND b = tc.b)
GROUP BY b
HAVING MIN(distance) > 1
ORDER BY b
44
122. Facebook: You might also know (1)
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 1 -- set the starting node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND tc.distance < 2
)
SELECT a, b
FROM transitive_closure
GROUP BY a, b
HAVING MIN(distance) = 2 -- select nodes 2 steps away
46
123. Facebook: You might also know (1)
WITH RECURSIVE transitive_closure (a, b, distance,
path_string) AS
( SELECT a, b, 1 AS distance,
'.' || a || '.' || b || '.' AS path_string
FROM edges2
WHERE a = 1 -- set the starting node
UNION ALL
SELECT tc.a, e.b, tc.distance + 1,
tc.path_string || e.b || '.' AS path_string
FROM edges2 AS e
JOIN transitive_closure AS tc ON e.a = tc.b
WHERE tc.path_string NOT LIKE '%.' || e.b || '.%'
AND tc.distance < 2
)
SELECT a, b
FROM transitive_closure
GROUP BY a, b
HAVING MIN(distance) = 2 -- select nodes 2 steps away
46
124. Facebook: You might also know (2)
SELECT a AS you,
b AS might_know,
CASE WHEN n1.feat1 = n2.feat1 THEN 'feat1'
WHEN n1.feat2 = n2.feat2 THEN 'feat2'
ELSE 'nothing'
END AS common_feature
FROM (
CTE for nodes 2 steps away (previous slide)
) AS youmightknow
LEFT JOIN nodes AS n1 ON youmightknow.a = n1.id
LEFT JOIN nodes AS n2 ON youmightknow.b = n2.id
WHERE n1.feat1 = n2.feat1
OR n1.feat2 = n2.feat2;
47
125. Facebook: You might also know (2)
SELECT a AS you,
b AS might_know,
CASE WHEN n1.feat1 = n2.feat1 THEN 'feat1'
WHEN n1.feat2 = n2.feat2 THEN 'feat2'
ELSE 'nothing'
END AS common_feature
FROM (
CTE for nodes 2 steps away (previous slide)
) AS youmightknow
LEFT JOIN nodes AS n1 ON youmightknow.a = n1.id
LEFT JOIN nodes AS n2 ON youmightknow.b = n2.id
WHERE n1.feat1 = n2.feat1
OR n1.feat2 = n2.feat2;
47
126. Facebook: You might also know (2)
SELECT a AS you,
b AS might_know,
CASE WHEN n1.feat1 = n2.feat1 THEN 'feat1'
WHEN n1.feat2 = n2.feat2 THEN 'feat2'
ELSE 'nothing'
END AS common_feature
FROM (
you might_know common_feature
1 7 feat1
CTE for nodes 2 steps away (previous slide)
1 13
) AS youmightknow feat2
LEFT JOIN nodes AS n1 ON youmightknow.a = n1.id
LEFT JOIN nodes AS n2 ON youmightknow.b = n2.id
WHERE n1.feat1 = n2.feat1
OR n1.feat2 = n2.feat2;
47
128. Sample Table
company
id emp dept salary
1 Robert Management 2500
2 Jane Sales 1800
3 Dave Sales 1700
4 Clare Development 1900
5 Richard Development 1500
6 Simon Development 1850
7 Elizabeth Tech Support 1350
8 Dylan Tech Support 1450
... ... ... ...
49
129. Aggregates
SELECT dept,
AVG(salary)
FROM company
GROUP BY dept
dept AVG(salary)
Management 2500
Sales 1750
Development 1750
Tech Support 1400
50
130. Window Functions (SQL-2003)
SELECT id, emp, dept, salary,
SUM(salary) OVER (PARTITION BY dept)
FROM company
id emp dept salary SUM(salary)
1 Robert Management 2500 2500
2 Jane Sales 1800 3500
3 Dave Sales 1700 3500
4 Clare Development 1900 5250
5 Richard Development 1500 5250
6 Simon Development 1850 5250
7 Elizabeth Tech Support 1350 2800
8 Dylan Tech Support 1450 2800
51
131. Window Functions (SQL-2003)
SELECT id, emp, dept, salary,
ROUND(salary - AVG(salary) OVER
(PARTITION BY dept), 0) AS diff_avg_dept
FROM company
id emp dept salary diff_avg_dept
1 Robert Management 2500 0
2 Jane Sales 1800 50
3 Dave Sales 1700 -50
4 Clare Development 1900 150
5 Richard Development 1500 -250
6 Simon Development 1850 100
7 Elizabeth Tech Support 1350 -50
8 Dylan Tech Support 1450 50
52
134. Window Functions Syntax
function (args) OVER (
[partition clause]
[order by clause]
[frame clause]
)
PARTITION BY expr, ...
(if empty, over the entire result set)
54
135. Window Functions Syntax
function (args) OVER (
[partition clause]
[order by clause]
[frame clause]
)
ORDER BY expr [ASC|DESC]
[NULLS FIRST|LAST], ...
(useful for order and ranking functions)
54
136. Window Functions Syntax
function (args) OVER (
[partition clause]
[order by clause]
[frame clause]
)
(ROWS|RANGE) [BETWEEN
(UNBOUNDED|expr)
(PRECEDING|FOLLOWING)
AND] CURRENT ROW
54
138. Named Windows and Ranking
SELECT emp, dept, salary, row_number()
OVER w, rank() OVER w, dense_rank() OVER w
FROM company WINDOW w AS (ORDER BY salary DESC)
ORDER BY salary DESC
emp dept salary row_number rank dense_rank
Robert Management 2500 1 1 1
Clare Development 1900 2 2 2
Simon Development 1800 3 3 3
Jane Sales 1800 4 3 3
Dave Sales 1700 5 5 4
Richard Development 1500 6 6 5
Dylan Tech Support 1450 7 7 6
Elizabeth Tech Support 1350 8 8 7
55
139. Named Windows and Ranking
SELECT emp, dept, salary, row_number()
OVER w, rank() OVER w, dense_rank() OVER w
FROM company WINDOW w AS (ORDER BY salary DESC)
ORDER BY salary DESC
emp dept salary row_number rank dense_rank
Robert Management 2500 1 1 1
Clare Development 1900 2 2 2
Simon Development 1800 3 3 3
Jane Sales 1800 4 3 3
Dave Sales 1700 5 5 4
Richard Development 1500 6 6 5
Dylan Tech Support 1450 7 7 6
Elizabeth Tech Support 1350 8 8 7
55
140. Named Windows and Ranking
SELECT emp, dept, salary, row_number()
OVER w, rank() OVER w, dense_rank() OVER w
FROM company WINDOW w AS (ORDER BY salary DESC)
ORDER BY salary DESC
emp dept salary row_number rank dense_rank
Robert Management 2500 1 1 1
Clare Development 1900 2 2 2
Simon Development 1800 3 3 3
Jane Sales 1800 4 3 3
Dave Sales 1700 5 5 4
Richard Development 1500 6 6 5
Dylan Tech Support 1450 7 7 6
Elizabeth Tech Support 1350 8 8 7
55
141. Cumulative Sum
SELECT id,
customer,
article,
price,
SUM(price) OVER (PARTITION BY customer
ORDER BY purchase_date
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS cumulative,
SUM(price) OVER (PARTITION BY customer)
AS tot_price
FROM orders
ORDER BY customer
56
142. Cumulative Sum
SELECT id,
customer,
article,
price,
SUM(price) OVER (PARTITION BY customer
ORDER BY purchase_date
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS cumulative,
SUM(price) OVER (PARTITION BY customer)
AS tot_price
FROM orders
ORDER BY customer
56
146. Multiple Partitions
SELECT id,
customer,
article,
price,
SUM(price) OVER (PARTITION BY customer
ORDER BY purchase_date
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS cumulative,
SUM(price) OVER (PARTITION BY customer)
AS tot_price
SUM(price) OVER (PARTITION BY article)
AS tot_price_by_article
FROM orders
ORDER BY customer
58
147. Multiple Partitions
SELECT id,
customer,
article,
price,
SUM(price) OVER (PARTITION BY customer
ORDER BY purchase_date
ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW) AS cumulative,
SUM(price) OVER (PARTITION BY customer)
AS tot_price
SUM(price) OVER (PARTITION BY article)
AS tot_price_by_article
FROM orders
ORDER BY customer
58
149. Address Book
SELECT MIN(firstname) || ' - ' ||
MAX(firstname) AS name_range,
TRUNC(rn / 250) + 1 AS chunk,
COUNT(*) AS chunk_size,
tot
FROM (
SELECT firstname,
row_number() OVER (ORDER BY firstname) - 1 rn,
COUNT(*) OVER () AS tot
FROM my_contacts
) AS f_names
GROUP BY chunk, tot
ORDER BY name_range
60
150. Address Book
SELECT MIN(firstname) || ' - ' ||
MAX(firstname) AS name_range,
TRUNC(rn / 250) + 1 AS chunk,
COUNT(*) AS chunk_size,
tot
FROM (
SELECT firstname,
row_number() OVER (ORDER BY firstname) - 1 rn,
COUNT(*) OVER () AS tot
FROM my_contacts
) AS f_names
GROUP BY chunk, tot
ORDER BY name_range
60
151. Address Book
SELECT MIN(firstname) || ' - ' ||
MAX(firstname) AS name_range,
TRUNC(rn / 250) + 1 AS chunk,
COUNT(*) AS chunk_size,
tot
FROM (
SELECT firstname,
row_number() OVER (ORDER BY firstname) - 1 rn,
COUNT(*) OVER () AS tot
FROM my_contacts
) AS f_names
GROUP BY chunk, tot
ORDER BY name_range
60
152. Address Book
SELECT MIN(firstname) || ' - ' ||
MAX(firstname) AS name_range,
TRUNC(rn / 250) + 1 AS chunk,
COUNT(*) AS chunk_size,
tot
FROM (
SELECT firstname,
row_number() OVER (ORDER BY firstname) - 1 rn,
COUNT(*) OVER () AS tot
FROM my_contacts
) AS f_names
GROUP BY chunk, tot
ORDER BY name_range
60
154. Address Book
SELECT UPPER(SUBSTR(MIN(firstname), 1, 2)) || ' - ' ||
UPPER(SUBSTR(MAX(firstname), 1, 2))
AS name_range,
...
name_range chunk chunk_size tot
AA - AN 1 250 5494
AN - BE 2 250 5494
BE - CA 3 250 5494
... ... ... ...
VE - ZU 22 244 5494
62