The story about a challenging PoC that proved that Postgres can achieve the same performance as Oracle Exadata. The schema that was migrated wasn’t the simplest one you might see. It was quite the opposite. The code was loaded with dynamic queries, BULK COLLECT’s, nested loops, CONNECT BY statements, global variables and lot of dependencies.
1. The story about the migration
Sushant Pandey Engineering Architect
Postgres Build 2020, Dec 9.
Alicja Kucharczyk EMEA Global Black Belt
OSS Data Tech Specialist
4. PoC Scope
2 packages from 1 schema
+ single objects and schemas from dependent packages, e.g. for
logging
5. Success Criteria
• Comparable performance to the existing Oracle Exadata instance
• Based on test script provided by customer which tests performance
of 2 packages.
14. SYS_CONNECT_BY_PATH
valid only in hierarchical
queries. It returns the path
of a column value from root
to node, with column values
separated by char for each
row returned by CONNECT
BY condition.
SELECT LPAD(' ', 2*level-1)||SYS_CONNECT_BY_PATH(last_name, '/')
"Path"
FROM employees
START WITH last_name = 'Kochhar'
CONNECT BY PRIOR employee_id = manager_id;
Path
---------------------------------------------------------------
/Kochhar
/Kochhar/Greenberg
/Kochhar/Greenberg/Faviet
/Kochhar/Greenberg/Chen
/Kochhar/Greenberg/Sciarra
/Kochhar/Greenberg/Urman
/Kochhar/Greenberg/Popp
/Kochhar/Whalen
/Kochhar/Mavris
/Kochhar/Baer
/Kochhar/Higgins
/Kochhar/Higgins/Gietz
https://docs.oracle.com/cd/B19306_01/server.102/b14200/functions164.htm
15. CREATE OR REPLACE TYPE FOO_TYPE IS
TABLE OF INTEGER;
CREATE OR REPLACE PROCEDURE SysConnectByPath
IS
bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5);
bar_ids_string VARCHAR(1000);
BEGIN
SELECT SUBSTR(SYS_CONNECT_BY_PATH(column_value, ','), 2) csv
INTO bar_ids_string
FROM (SELECT column_value,
ROW_NUMBER() OVER (ORDER BY column_value ) rn,
COUNT(*) OVER () cnt
FROM TABLE (bar_ids_tab))
WHERE rn = cnt
START WITH rn = 1
CONNECT BY rn = PRIOR rn + 1;
DBMS_OUTPUT.PUT_LINE('out ' || bar_ids_string);
END;
completed in 45 ms
out 3,4,5,20,111
16. ora2pg -i SysConnectByPath.sql -t PROCEDURE -c config/ora2pg.conf
CREATE OR REPLACE FUNCTION sysconnectbypath() RETURNS VOID AS
$body$
DECLARE
bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5);
bar_ids_string VARCHAR(1000);WITH RECURSIVE cte AS (
BEGIN
SELECT SUBSTR(column_value, 2) CSV
INTO STRICT bar_ids_string
FROM (SELECT column_value,
ROW_NUMBER() OVER (ORDER BY column_value ) rn,
COUNT(*) OVER () cnt
FROM TABLE(bar_ids_tab) alias6) alias7
WHERE rn = 1
UNION ALL
BEGIN
SELECT C.bar_ids_string || ',' || SUBSTR(column_value, 2) CSV
INTO STRICT bar_ids_string
FROM (SELECT column_value,
ROW_NUMBER() OVER (ORDER BY column_value ) rn,
COUNT(*) OVER () cnt
FROM TABLE(bar_ids_tab) alias6) JOIN cte C ON (C.rn + 1 = alias7.rn)
) SELECT * FROM cte WHERE rn = cnt;
;
RAISE NOTICE 'out %', bar_ids_string;
END;
$body$
LANGUAGE PLPGSQL;
17. Let’s get rid of WITH RECURSIVE
DO
$$
DECLARE
bar_ids_tab NUMERIC[];
bar_ids_string TEXT;
BEGIN
bar_ids_tab := '{111, 20, 3, 4, 5}';
SELECT string_agg(x::TEXT, ',')
INTO bar_ids_string
FROM (
SELECT unnest(bar_ids_tab) AS x
ORDER BY x) a;
RAISE NOTICE '%', bar_ids_string;
END;
$$
[00000] 3,4,5,20,111
completed in 3 ms
18. Postgres WINS!
DO
$$
DECLARE
bar_ids_tab NUMERIC[];
bar_ids_string TEXT;
BEGIN
bar_ids_tab := '{111, 20, 3, 4, 5}';
SELECT string_agg(x::TEXT, ',')
INTO bar_ids_string
FROM (
SELECT unnest(bar_ids_tab) AS x
ORDER BY x) a;
RAISE NOTICE '%', bar_ids_string;
END;
$$
CREATE OR REPLACE TYPE FOO_TYPE IS
TABLE OF INTEGER;
CREATE OR REPLACE PROCEDURE SysConnectByPath
IS
bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5);
bar_ids_string VARCHAR(1000);
BEGIN
SELECT SUBSTR(SYS_CONNECT_BY_PATH(column_value, ','), 2) csv
INTO bar_ids_string
FROM (SELECT column_value,
ROW_NUMBER() OVER (ORDER BY column_value ) rn,
COUNT(*) OVER () cnt
FROM TABLE (bar_ids_tab))
WHERE rn = cnt
START WITH rn = 1
CONNECT BY rn = PRIOR rn + 1;
DBMS_OUTPUT.PUT_LINE('out ' || bar_ids_string);
END;
[00000] 3,4,5,20,111
completed in 3 ms
out 3,4,5,20,111
completed in 45 ms
20. BULK COLLECT
With the BULK COLLECT clause, each of the preceding statements retrieves an
entire result set and stores it in one or more collection variables in a single
operation (which is more efficient than using a loop statement to retrieve one
result row at a time).
https://docs.oracle.com/database/121/LNPLS/tuning.htm#LNPLS891
21. Oracle snippet
CREATE OR REPLACE PROCEDURE BulkCollect(p_version IN NUMBER)
IS
CURSOR get_foo (p_version number) IS
SELECT * FROM secret_data
WHERE foo_id = p_version
ORDER BY bar_id;
TYPE my_type IS TABLE of get_foo%ROWTYPE INDEX BY pls_integer;
my_tab my_type;
some_string VARCHAR2(5000);
BEGIN
OPEN get_foo(1);
-- Process one level at a time.
LOOP
FETCH get_foo
BULK COLLECT INTO my_tab LIMIT 1000;
EXIT WHEN my_tab.COUNT = 0;
FOR indx IN 1 .. my_tab.COUNT
LOOP
some_string := '';
FOR prnt_indx IN REVERSE 2..indx - 1
LOOP
some_string := some_string || ', tab.' ||
TO_CHAR(my_tab(prnt_indx).my_flag);
END LOOP;
END LOOP;
END LOOP;
CLOSE get_foo;
END;
CALL BulkCollect(1)
completed in 41 ms
22. ora2pg -i BulkCollect.sql -t PROCEDURE –c config/ora2pg.conf
CREATE OR REPLACE FUNCTION bulkcollect(p_version BIGINT) RETURNS VOID AS
$body$
DECLARE
get_foo CURSOR (p_version BIGINT) FOR
SELECT *
FROM secret_data
WHERE foo_id = p_version
ORDER BY bar_id;
TYPE MY_TYPE IS TABLE OF RECORD INDEX BY INTEGER;
my_tab MY_TYPE;
some_string VARCHAR(5000);
BEGIN
OPEN get_foo(1);
-- Process one level at a time.
LOOP
FETCH get_foo BULK COLLECT INTO my_tab LIMIT 1000;
EXIT WHEN my_tab.COUNT = 0;
FOR indx IN 1 .. my_tab.COUNT
LOOP
some_string := '';
FOR prnt_indx IN REVERSE indx..2 - 1
LOOP
some_string := some_string || ', tab.' ||
my_tab[prnt_indx].my_flag::VARCHAR;
END LOOP;
END LOOP;
END LOOP;
CLOSE get_foo;
END;
$body$
LANGUAGE PLPGSQL
;
23. We don’t like cursors, right?
CREATE OR REPLACE FUNCTION
get_foo(p_version INTEGER)
RETURNS SETOF SECRET_DATA AS
$body$
SELECT * FROM secret_data
WHERE foo_id = p_version
ORDER BY bar_id;
$body$
LANGUAGE SQL STABLE;
CURSOR get_foo (p_version number) IS
SELECT * FROM secret_data
WHERE foo_id = p_version
ORDER BY bar_id;
24. PostgreSQL snippet
CREATE PROCEDURE BulkCollect(p_version INT)
LANGUAGE plpgsql AS
$body$
DECLARE
some_string TEXT;
get_foo_row RECORD;
i INT;
BEGIN
DROP TABLE IF EXISTS temp_get_foo;
CREATE TEMPORARY TABLE temp_get_foo ON COMMIT DROP AS
SELECT row_number() OVER () as rnum, * FROM get_foo(p_version);
DELETE FROM temp_get_foo WHERE rnum = 1;
ANALYZE temp_get_foo;
i := 1;
FOR get_foo_row IN SELECT * FROM get_foo(p_version)
-- Process one level at a time.
LOOP
SELECT 'tab.' || string_agg(my_flag, ', tab.' ORDER BY rnum DESC)
INTO some_string
FROM temp_get_foo
WHERE rnum < (
SELECT rnum
FROM temp_get_foo
WHERE bar_id = get_foo_row.bar_id)
AND rnum <> (SELECT max(rnum) FROM temp_get_foo);
i := i + 1;
END LOOP;
END;
$body$;
CALL BulkCollect(1)
completed in 14 ms
25. Postgres WINS!
completed in 14 ms completed in 41 ms
CREATE PROCEDURE BulkCollect(p_version INT)
LANGUAGE plpgsql AS
$body$
DECLARE
some_string TEXT;
get_foo_row RECORD;
i INT;
BEGIN
DROP TABLE IF EXISTS temp_get_foo;
CREATE TEMPORARY TABLE temp_get_foo ON COMMIT DROP AS
SELECT row_number() OVER () as rnum, * FROM get_foo(p_version);
DELETE FROM temp_get_foo WHERE rnum = 1;
ANALYZE temp_get_foo;
i := 1;
FOR get_foo_row IN SELECT * FROM get_foo(p_version)
-- Process one level at a time.
LOOP
SELECT 'tab.' || string_agg(my_flag, ', tab.' ORDER BY rnum DESC)
INTO some_string
FROM temp_get_foo
WHERE rnum < (
SELECT rnum
FROM temp_get_foo
WHERE bar_id = get_foo_row.bar_id)
AND rnum <> (SELECT max(rnum) FROM temp_get_foo);
i := i + 1;
END LOOP;
END;
$body$;
CREATE OR REPLACE PROCEDURE BulkCollect(p_version IN NUMBER)
IS
CURSOR get_foo (p_version number) IS
SELECT * FROM secret_data
WHERE foo_id = p_version
ORDER BY bar_id;
TYPE my_type IS TABLE of get_foo%ROWTYPE INDEX BY pls_integer;
my_tab my_type;
some_string VARCHAR2(5000);
BEGIN
OPEN get_foo(1);
-- Process one level at a time.
LOOP
FETCH get_foo
BULK COLLECT INTO my_tab LIMIT 1000;
EXIT WHEN my_tab.COUNT = 0;
FOR indx IN 1 .. my_tab.COUNT
LOOP
some_string := '';
FOR prnt_indx IN REVERSE 2..indx - 1
LOOP
some_string := some_string || ', tab.' ||
TO_CHAR(my_tab(prnt_indx).my_flag);
END LOOP;
END LOOP;
END LOOP;
CLOSE get_foo;
END;
29. General Rules
Existing code
Nested Loops
Dynamic Queries
Refcursor as output along with other output params
Replacements
Reduced loop nesting with improvements
SETOF Record as output
Improved logic for dynamic queries to make them performant
30. Our Azure Postgres service page—and our blog!
Azure Database for PostgreSQL
https://aka.ms/azure-postgres
Azure Postgres Blog
https://aka.ms/azure-postgres-blog