Refactoring SQL for Performance

867 views
771 views

Published on

Refactoring SQL code for performance

Published in: Technology
0 Comments
0 Likes
Statistics
Notes
  • Be the first to comment

  • Be the first to like this

No Downloads
Views
Total views
867
On SlideShare
0
From Embeds
0
Number of Embeds
9
Actions
Shares
0
Downloads
14
Comments
0
Likes
0
Embeds 0
No embeds

No notes for slide

Refactoring SQL for Performance

  1. 1. loan_nbr customer_nbr code value 1 1 amount 1500.00 1 1 date 20080110 1 1 type personal 2 2 amount 3500.00 2 2 date 20080215 2 2 type personal CREATE TABLE EAV_Loans ( loan_nbr INT NOT NULL, customer_nbr INT NOT NULL, code VARCHAR(30) NOT NULL, value VARCHAR(200), CONSTRAINT pk_eav_loans PRIMARY KEY (loan_nbr, customer_nbr, code));
  2. 2. -- Customers with personal loans over 1000.00 for the period -- Jan 1, 2008 through Jan 31, 2008 SELECT A.loan_nbr, A.customer_nbr, CAST(A.value AS DATETIME) AS loan_date, CAST(B.value AS DECIMAL(15, 2)) AS loan_amount FROM EAV_Loans AS A INNER JOIN EAV_Loans AS B ON A.loan_nbr = B.loan_nbr AND A.customer_nbr = B.customer_nbr INNER JOIN EAV_Loans AS C ON A.loan_nbr = C.loan_nbr AND A.customer_nbr = C.customer_nbr WHERE A.code = 'date' AND CAST(A.value AS DATETIME) >= '20080101' AND CAST(A.value AS DATETIME) < '20080201' AND B.code = 'amount' AND CAST(B.value AS DECIMAL(15, 2)) > 1000.00 AND C.code = 'type' AND C.value = 'personal';
  3. 3. SELECT A.loan_nbr, A.customer_nbr, loan_date, loan_amount FROM (SELECT loan_nbr, customer_nbr, CAST(value AS DATETIME) AS loan_date FROM EAV_Loans WHERE code = 'date') AS A INNER JOIN (SELECT loan_nbr, customer_nbr, CAST(value AS DECIMAL(15, 2)) AS loan_amount FROM EAV_Loans WHERE code = 'amount') AS B ON A.loan_nbr = B.loan_nbr AND A.customer_nbr = B.customer_nbr INNER JOIN (SELECT loan_nbr, customer_nbr, value AS loan_type FROM EAV_Loans WHERE code = 'type') AS C ON A.loan_nbr = C.loan_nbr AND A.customer_nbr = C.customer_nbr WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  4. 4. SELECT loan_nbr, customer_nbr, loan_date, loan_amount FROM (SELECT loan_nbr, customer_nbr, MAX(CASE WHEN code = 'date' THEN CAST(value AS DATETIME) END), MAX(CASE WHEN code = 'amount' THEN CAST(value AS DECIMAL(15, 2)) END), MAX(CASE WHEN code = 'type' THEN value END) FROM EAV_Loans GROUP BY loan_nbr, customer_nbr ) AS L(loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  5. 5. loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-10 00:00:00.000 1500.00 personal 2 2 2008-02-15 00:00:00.000 3500.00 personal CREATE TABLE Loans ( loan_nbr INT NOT NULL, customer_nbr INT NOT NULL, loan_date DATETIME NOT NULL, loan_amount DECIMAL(15, 2) NOT NULL, loan_type VARCHAR(10) NOT NULL, CONSTRAINT ck_loan_type CHECK (loan_type IN ('personal', 'business')), CONSTRAINT pk_loans PRIMARY KEY (loan_nbr));
  6. 6. -- Convert EAV table to normalized INSERT INTO Loans (loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) SELECT loan_nbr, customer_nbr, MAX(CASE WHEN code = 'date' THEN CAST(value AS DATETIME) END), MAX(CASE WHEN code = 'amount' THEN CAST(value AS DECIMAL(15, 2)) END), MAX(CASE WHEN code = 'type' THEN value END) FROM EAV_Loans GROUP BY loan_nbr, customer_nbr;
  7. 7. -- Customers with personal loans over 1000.00 -- for period Jan 1, 2008 through Jan 31, 2008 SELECT loan_nbr, customer_nbr, loan_date, loan_amount FROM Loans WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  8. 8. -- Replacement view for legacy code CREATE VIEW EAV_Loans (loan_nbr, customer_nbr, code, value) AS SELECT loan_nbr, customer_nbr, CAST('date' AS VARCHAR(30)), CONVERT(VARCHAR(200), loan_date, 112) FROM Loans UNION SELECT loan_nbr, customer_nbr, CAST('amount' AS VARCHAR(30)), CAST(loan_amount AS VARCHAR(200)) FROM Loans UNION SELECT loan_nbr, customer_nbr, CAST('type' AS VARCHAR(30)), CAST(loan_type AS VARCHAR(200)) FROM Loans;
  9. 9. loan_nbr loan_date loan_amount loan_type rk 3 2008-03-11 00:00:00.000 5000.00 business 1 6 2008-03-27 00:00:00.000 4000.00 business 2 7 2008-04-10 00:00:00.000 3500.00 business 3 4 2008-03-12 00:00:00.000 2000.00 personal 1 8 2008-04-12 00:00:00.000 2000.00 personal 2 1 2008-01-01 00:00:00.000 1500.00 personal 3 5 2008-03-25 00:00:00.000 1200.00 personal 4 2 2008-02-15 00:00:00.000 1000.00 personal 5
  10. 10. SELECT loan_nbr, loan_date, loan_amount, loan_type, (SELECT COUNT(*) FROM Loans AS L2 WHERE L2.loan_type = L1.loan_type AND (L2.loan_amount > L1.loan_amount OR L2.loan_amount = L1.loan_amount AND L2.loan_nbr <= L1.loan_nbr)) AS rk FROM Loans AS L1 ORDER BY loan_type, rk;
  11. 11. SELECT loan_nbr, loan_date, loan_amount, loan_type, ROW_NUMBER() OVER(PARTITION BY loan_type ORDER BY loan_amount DESC, loan_nbr) AS rk FROM Loans ORDER BY loan_type, rk;
  12. 12. loan_nbr customer_nbr loan_date loan_amount loan_type 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 4500.00 business 4 3 2008-03-12 00:00:00.000 2000.00 personal loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-01 00:00:00.000 1500.00 personal 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 5000.00 business loan_nbr customer_nbr loan_date loan_amount loan_type 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 4500.00 business 4 3 2008-03-12 00:00:00.000 2000.00 personal
  13. 13. -- Update changed UPDATE Loans SET loan_amount = (SELECT D.loan_amount FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr AND D.loan_amount <> Loans.loan_amount) WHERE EXISTS(SELECT * FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr AND D.loan_amount <> Loans.loan_amount); -- Insert new loans INSERT INTO Loans (loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) SELECT loan_nbr, customer_nbr, loan_date, loan_amount, loan_type FROM DailyChangedLoans AS D WHERE NOT EXISTS(SELECT * FROM Loans AS L WHERE D.loan_nbr = L.loan_nbr); -- Remove deleted DELETE FROM Loans WHERE NOT EXISTS(SELECT * FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr);
  14. 14. -- Using a single MERGE statement MERGE INTO Loans AS L USING DailyChangedLoans AS D ON D.loan_nbr = L.loan_nbr WHEN MATCHED AND L.loan_amount <> D.loan_amount THEN UPDATE SET loan_amount = D.loan_amount WHEN NOT MATCHED THEN INSERT VALUES(D.loan_nbr, D.customer_nbr, D.loan_date, D.loan_amount, D.loan_type) WHEN NOT MATCHED BY SOURCE THEN DELETE;
  15. 15. loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-01 00:00:00.000 1500.00 personal 2 2 2008-01-02 00:00:00.000 1000.00 personal 3 1 2008-01-03 00:00:00.000 5000.00 business 4 3 2008-01-12 00:00:00.000 2000.00 personal 5 4 2008-01-13 00:00:00.000 1200.00 personal 6 3 2008-01-29 00:00:00.000 4000.00 business 7 5 2008-01-30 00:00:00.000 3500.00 business 8 2 2008-01-31 00:00:00.000 2000.00 personal start_date end_date 2008-01-01 00:00:00.000 2008-01-03 00:00:00.000 2008-01-12 00:00:00.000 2008-01-13 00:00:00.000 2008-01-29 00:00:00.000 2008-01-31 00:00:00.000
  16. 16. -- Find last date for date range -- and use as grouping factor SELECT MIN(loan_date) AS start_date, MAX(loan_date) AS end_date FROM (SELECT loan_date, (SELECT MIN(L2.loan_date) FROM Loans AS L2 WHERE L2.loan_date >= L1.loan_date AND NOT EXISTS (SELECT * FROM Loans AS L3 WHERE L3.loan_date = DATEADD(DAY, 1, L2.loan_date)) ) AS base FROM Loans AS L1) AS L GROUP BY base;
  17. 17. -- Preparation for solution SELECT loan_date, DATEDIFF(DAY, '19000101', loan_date) AS days_since_base_date, ROW_NUMBER() OVER(ORDER BY loan_date) AS rn FROM Loans; loan_date days_since_base_date rn 2008-01-01 00:00:00.000 39446 1 2008-01-02 00:00:00.000 39447 2 2008-01-03 00:00:00.000 39448 3 2008-01-12 00:00:00.000 39457 4 2008-01-13 00:00:00.000 39458 5 2008-01-29 00:00:00.000 39474 6 2008-01-30 00:00:00.000 39475 7 2008-01-31 00:00:00.000 39476 8
  18. 18. -- Solution with ROW_NUMBER SELECT MIN(loan_date) AS start_date, MAX(loan_date) AS end_date FROM (SELECT loan_date, DATEDIFF(DAY, '19000101', loan_date) - ROW_NUMBER() OVER(ORDER BY loan_date) AS base FROM Loans) AS L GROUP BY base;

×