Refactoring SQL for Performance
Upcoming SlideShare
Loading in...5
×
 

Like this? Share it with your network

Share

Refactoring SQL for Performance

on

  • 973 views

Refactoring SQL code for performance

Refactoring SQL code for performance

Statistics

Views

Total Views
973
Views on SlideShare
966
Embed Views
7

Actions

Likes
0
Downloads
9
Comments
0

3 Embeds 7

http://www.linkedin.com 5
http://www.slideshare.net 1
http://www.docshut.com 1

Accessibility

Categories

Upload Details

Uploaded via as Adobe PDF

Usage Rights

© All Rights Reserved

Report content

Flagged as inappropriate Flag as inappropriate
Flag as inappropriate

Select your reason for flagging this presentation as inappropriate.

Cancel
  • Full Name Full Name Comment goes here.
    Are you sure you want to
    Your message goes here
    Processing…
Post Comment
Edit your comment

Refactoring SQL for Performance Presentation Transcript

  • 1. loan_nbr customer_nbr code value 1 1 amount 1500.00 1 1 date 20080110 1 1 type personal 2 2 amount 3500.00 2 2 date 20080215 2 2 type personal CREATE TABLE EAV_Loans ( loan_nbr INT NOT NULL, customer_nbr INT NOT NULL, code VARCHAR(30) NOT NULL, value VARCHAR(200), CONSTRAINT pk_eav_loans PRIMARY KEY (loan_nbr, customer_nbr, code));
  • 2. -- Customers with personal loans over 1000.00 for the period -- Jan 1, 2008 through Jan 31, 2008 SELECT A.loan_nbr, A.customer_nbr, CAST(A.value AS DATETIME) AS loan_date, CAST(B.value AS DECIMAL(15, 2)) AS loan_amount FROM EAV_Loans AS A INNER JOIN EAV_Loans AS B ON A.loan_nbr = B.loan_nbr AND A.customer_nbr = B.customer_nbr INNER JOIN EAV_Loans AS C ON A.loan_nbr = C.loan_nbr AND A.customer_nbr = C.customer_nbr WHERE A.code = 'date' AND CAST(A.value AS DATETIME) >= '20080101' AND CAST(A.value AS DATETIME) < '20080201' AND B.code = 'amount' AND CAST(B.value AS DECIMAL(15, 2)) > 1000.00 AND C.code = 'type' AND C.value = 'personal';
  • 3. SELECT A.loan_nbr, A.customer_nbr, loan_date, loan_amount FROM (SELECT loan_nbr, customer_nbr, CAST(value AS DATETIME) AS loan_date FROM EAV_Loans WHERE code = 'date') AS A INNER JOIN (SELECT loan_nbr, customer_nbr, CAST(value AS DECIMAL(15, 2)) AS loan_amount FROM EAV_Loans WHERE code = 'amount') AS B ON A.loan_nbr = B.loan_nbr AND A.customer_nbr = B.customer_nbr INNER JOIN (SELECT loan_nbr, customer_nbr, value AS loan_type FROM EAV_Loans WHERE code = 'type') AS C ON A.loan_nbr = C.loan_nbr AND A.customer_nbr = C.customer_nbr WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  • 4. SELECT loan_nbr, customer_nbr, loan_date, loan_amount FROM (SELECT loan_nbr, customer_nbr, MAX(CASE WHEN code = 'date' THEN CAST(value AS DATETIME) END), MAX(CASE WHEN code = 'amount' THEN CAST(value AS DECIMAL(15, 2)) END), MAX(CASE WHEN code = 'type' THEN value END) FROM EAV_Loans GROUP BY loan_nbr, customer_nbr ) AS L(loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  • 5. loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-10 00:00:00.000 1500.00 personal 2 2 2008-02-15 00:00:00.000 3500.00 personal CREATE TABLE Loans ( loan_nbr INT NOT NULL, customer_nbr INT NOT NULL, loan_date DATETIME NOT NULL, loan_amount DECIMAL(15, 2) NOT NULL, loan_type VARCHAR(10) NOT NULL, CONSTRAINT ck_loan_type CHECK (loan_type IN ('personal', 'business')), CONSTRAINT pk_loans PRIMARY KEY (loan_nbr));
  • 6. -- Convert EAV table to normalized INSERT INTO Loans (loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) SELECT loan_nbr, customer_nbr, MAX(CASE WHEN code = 'date' THEN CAST(value AS DATETIME) END), MAX(CASE WHEN code = 'amount' THEN CAST(value AS DECIMAL(15, 2)) END), MAX(CASE WHEN code = 'type' THEN value END) FROM EAV_Loans GROUP BY loan_nbr, customer_nbr;
  • 7. -- Customers with personal loans over 1000.00 -- for period Jan 1, 2008 through Jan 31, 2008 SELECT loan_nbr, customer_nbr, loan_date, loan_amount FROM Loans WHERE loan_date >= '20080101' AND loan_date < '20080201' AND loan_amount > 1000.00 AND loan_type = 'personal';
  • 8. -- Replacement view for legacy code CREATE VIEW EAV_Loans (loan_nbr, customer_nbr, code, value) AS SELECT loan_nbr, customer_nbr, CAST('date' AS VARCHAR(30)), CONVERT(VARCHAR(200), loan_date, 112) FROM Loans UNION SELECT loan_nbr, customer_nbr, CAST('amount' AS VARCHAR(30)), CAST(loan_amount AS VARCHAR(200)) FROM Loans UNION SELECT loan_nbr, customer_nbr, CAST('type' AS VARCHAR(30)), CAST(loan_type AS VARCHAR(200)) FROM Loans;
  • 9. loan_nbr loan_date loan_amount loan_type rk 3 2008-03-11 00:00:00.000 5000.00 business 1 6 2008-03-27 00:00:00.000 4000.00 business 2 7 2008-04-10 00:00:00.000 3500.00 business 3 4 2008-03-12 00:00:00.000 2000.00 personal 1 8 2008-04-12 00:00:00.000 2000.00 personal 2 1 2008-01-01 00:00:00.000 1500.00 personal 3 5 2008-03-25 00:00:00.000 1200.00 personal 4 2 2008-02-15 00:00:00.000 1000.00 personal 5
  • 10. SELECT loan_nbr, loan_date, loan_amount, loan_type, (SELECT COUNT(*) FROM Loans AS L2 WHERE L2.loan_type = L1.loan_type AND (L2.loan_amount > L1.loan_amount OR L2.loan_amount = L1.loan_amount AND L2.loan_nbr <= L1.loan_nbr)) AS rk FROM Loans AS L1 ORDER BY loan_type, rk;
  • 11. SELECT loan_nbr, loan_date, loan_amount, loan_type, ROW_NUMBER() OVER(PARTITION BY loan_type ORDER BY loan_amount DESC, loan_nbr) AS rk FROM Loans ORDER BY loan_type, rk;
  • 12. loan_nbr customer_nbr loan_date loan_amount loan_type 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 4500.00 business 4 3 2008-03-12 00:00:00.000 2000.00 personal loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-01 00:00:00.000 1500.00 personal 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 5000.00 business loan_nbr customer_nbr loan_date loan_amount loan_type 2 2 2008-02-15 00:00:00.000 1000.00 personal 3 1 2008-03-11 00:00:00.000 4500.00 business 4 3 2008-03-12 00:00:00.000 2000.00 personal
  • 13. -- Update changed UPDATE Loans SET loan_amount = (SELECT D.loan_amount FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr AND D.loan_amount <> Loans.loan_amount) WHERE EXISTS(SELECT * FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr AND D.loan_amount <> Loans.loan_amount); -- Insert new loans INSERT INTO Loans (loan_nbr, customer_nbr, loan_date, loan_amount, loan_type) SELECT loan_nbr, customer_nbr, loan_date, loan_amount, loan_type FROM DailyChangedLoans AS D WHERE NOT EXISTS(SELECT * FROM Loans AS L WHERE D.loan_nbr = L.loan_nbr); -- Remove deleted DELETE FROM Loans WHERE NOT EXISTS(SELECT * FROM DailyChangedLoans AS D WHERE D.loan_nbr = Loans.loan_nbr);
  • 14. -- Using a single MERGE statement MERGE INTO Loans AS L USING DailyChangedLoans AS D ON D.loan_nbr = L.loan_nbr WHEN MATCHED AND L.loan_amount <> D.loan_amount THEN UPDATE SET loan_amount = D.loan_amount WHEN NOT MATCHED THEN INSERT VALUES(D.loan_nbr, D.customer_nbr, D.loan_date, D.loan_amount, D.loan_type) WHEN NOT MATCHED BY SOURCE THEN DELETE;
  • 15. loan_nbr customer_nbr loan_date loan_amount loan_type 1 1 2008-01-01 00:00:00.000 1500.00 personal 2 2 2008-01-02 00:00:00.000 1000.00 personal 3 1 2008-01-03 00:00:00.000 5000.00 business 4 3 2008-01-12 00:00:00.000 2000.00 personal 5 4 2008-01-13 00:00:00.000 1200.00 personal 6 3 2008-01-29 00:00:00.000 4000.00 business 7 5 2008-01-30 00:00:00.000 3500.00 business 8 2 2008-01-31 00:00:00.000 2000.00 personal start_date end_date 2008-01-01 00:00:00.000 2008-01-03 00:00:00.000 2008-01-12 00:00:00.000 2008-01-13 00:00:00.000 2008-01-29 00:00:00.000 2008-01-31 00:00:00.000
  • 16. -- Find last date for date range -- and use as grouping factor SELECT MIN(loan_date) AS start_date, MAX(loan_date) AS end_date FROM (SELECT loan_date, (SELECT MIN(L2.loan_date) FROM Loans AS L2 WHERE L2.loan_date >= L1.loan_date AND NOT EXISTS (SELECT * FROM Loans AS L3 WHERE L3.loan_date = DATEADD(DAY, 1, L2.loan_date)) ) AS base FROM Loans AS L1) AS L GROUP BY base;
  • 17. -- Preparation for solution SELECT loan_date, DATEDIFF(DAY, '19000101', loan_date) AS days_since_base_date, ROW_NUMBER() OVER(ORDER BY loan_date) AS rn FROM Loans; loan_date days_since_base_date rn 2008-01-01 00:00:00.000 39446 1 2008-01-02 00:00:00.000 39447 2 2008-01-03 00:00:00.000 39448 3 2008-01-12 00:00:00.000 39457 4 2008-01-13 00:00:00.000 39458 5 2008-01-29 00:00:00.000 39474 6 2008-01-30 00:00:00.000 39475 7 2008-01-31 00:00:00.000 39476 8
  • 18. -- Solution with ROW_NUMBER SELECT MIN(loan_date) AS start_date, MAX(loan_date) AS end_date FROM (SELECT loan_date, DATEDIFF(DAY, '19000101', loan_date) - ROW_NUMBER() OVER(ORDER BY loan_date) AS base FROM Loans) AS L GROUP BY base;