Pro ORM
Alex Gaynor
@alex_gaynor




               Django NYC
What is this talk?
  ORM Architecture   Practical Things




        50%          50%
ORM Architecture
QuerySet-Refactor
• A big refactor of the ORM internals
• Happened a bit before Django 1.0

   Old                      New
Multi DB
• GSOC project this past summer
• Added multiple database support to
  Django

• Ripped up a lot of internals, and then
  put them back together

   Old                      New
from django.db import models

class MyModel(models.Model):
    pass



                               MyModel.objects




                 Managers
class Manager(object):
    def get_query_set(self):
       return QuerySet(self.model)

    def get(self, *args, **kwargs):
        return self.get_query_set().get(*args, **kwargs)

    def filter(self, *args, **kwargs):
        return self.get_query_set().filter(*args, **kwargs)

    # ETC...




    get_query_set(), your
    gateway to the rabbit
QuerySet
• Backend agnostic
• Basically it handles turning the public
  API into calls to methods on Query

• Surprisingly little meat for a 1400
  line file

• Also, a few subclasses for values(),
  values_list(), and dates()
Query
• This part is backend specific, SQL vs.
  GAE here, not Postgres vs. Oracle
• It carries all the info around, ordering,
  joins, where, aggregates, etc...
• It used to generate SQL, not anymore
  (more on this later)
self.model = model
self.alias_refcount = {}
self.alias_map = {}
self.table_map = {}
self.join_map = {}
self.rev_join_map = {}
self.quote_cache = {}
self.default_cols = True
self.default_ordering = True
self.standard_ordering = True
self.ordering_aliases = []
self.select_fields = []
self.related_select_fields = []
self.dupe_avoidance = {}
self.used_aliases = set()
self.filter_is_sticky = False
self.included_inherited_models = {}
self.select = []
self.tables = []
self.where = where()
self.where_class = where
self.group_by = None
self.having = where()
self.order_by = []
self.low_mark, self.high_mark = 0, None
self.distinct = False
self.select_related = False
self.related_select_cols = []
self.aggregates = SortedDict()
self.aggregate_select_mask = None
self._aggregate_select_cache = None
self.max_depth = 5
self.extra = SortedDict()
self.extra_select_mask = None
self._extra_select_cache = None
self.extra_tables = ()
self.extra_order_by = ()
self.deferred_loading = (set(), True)
I told you it was a lot of
          stuff
The Major Players
   Attributes            Methods
   self.where       self.get_compiler()
  self.having          self.clone()
self.aggregates         self.join()
 self.alias_map      self.add_filter()
self.select_fields   self.add_ordering()
Dude, where’s my
         SQL?
There are no relevant images for SQL, so
you get to settle for me trying to be funny
SQLCompiler

• Takes Querys and turns ‘em into SQL.
• Also executes SQL.
• This handles the backend specific
  stuff.

• We ship 2 sets of them. 1 for Oracle,
  and one for pretty much everything
  else.
31 Flavors!

  SQLCompiler        SQLInsertCompiler

SQLDeleteCompiler     SQLDateCompiler

SQLUpdateCompiler   SQLAggregateCompiler
django.db.backends.*
• Interoperate with    • Running the cmd line
  database drivers       shell

• Handle most of the   • django.contrib.gis.db.
  nuances of the         backends
  various databases’
  SQL dialects

• DDL for creation
• Introspection
Putting this Stuff to
     Good Use
aka the part you might use at your day
                 job
Custom Aggregates
• Django comes with a few aggregates
                     django.db.models.aggregates
                                Avg
• But databases
  have other                   Count
  aggregates                    Max

• Some of them                  Min
  even let you                 StdDev
  create your own!
                                Sum
                              Variance
Aggregates Come in Two
        Parts
• One part is a data carrier, it knows
  what field you’re aggregating over.
• The other part turns that into SQL.
• Starting to sound familiar?
Part I
from django.db.models import Aggregate


class MyAggregate(Aggregate):
    def add_to_query(self, query, alias, col, course,
        is_summary):
        aggregate = SQLMyAggregate(col, source=source,
            is_summary=is_summary, **extra)
        query.aggregates[alias] = aggregate
Part II
from django.db.models.sql.aggregates import 
    Aggregate as SQLAggregate


class SQLMyAggregate(SQLAggregate):
    sql_template = "%(function)(cast(%(field)s as numeric) / 100)"
    sql_function = "MYFUNCTION"
    is_ordinal = True
Automatic Caching
Single Object Caching
• Basically we want to automatically
  cache any get() lookups on unique
  fields (primary key, slugs, etc.)

• These types of queries seem to pop up
  a lot (think every single foreign key
  traversal)
• Also automatically do invalidation for
  us.
from django.core.cache import cache
from django.db.models.query import QuerySet


class CachingQuerySet(QuerySet):
    def get(self, *args, **kwargs):
        sup = lambda: super(CachingQuerySet, self). 
            get(*args, **kwargs)
        if len(args) != 1 or not kwargs or self.query.where:
            return sup()
        key, value = kwargs.iteritems().next()
        if key.endswith("__exact"):
            key = key[:-len("__exact")]
        if key not in ["pk", self.model._meta.pk.name]:
            return sup()
        cache_key = "%s:%s:%s" % (
            self.model._meta.app_label,
            self.model._meta.object_name,
            value
        )
        obj = cache.get(cache_key)
        if obj is not None:
            return obj
        obj = sup()
        cache.set(cache_key, obj)
        return obj
from django.db.models.signals import pre_save, pre_delete


class CachingManager(QuerySet):
    use_for_related_fields = True

    def get_query_set(self):
        return CachingQuerySet(self.model)

    def contribute_to_class(self, *args, **kwargs):
        super(CachingManager, self). 
            contribute_to_class(*args, **kwargs)
        pre_save.connect(invalidate_cache, self.model)
        pre_delete.connect(invalidate_cache, self.model)
def invalide_cache(instance, sender, **kwargs):
    cache_key = "%s:%s:%s" % (
        instance._meta.app_label,
        instance._meta.object_name,
        instance.pk
    )
    cache.delete(cache_key)




       Aaand, Done!
Questions?
Extra
Custom Field Review
Methods

                          Converts value from
   to_python
                        serialized form to Python

    validate               Performs validation

    db_type             Returns the database type

                          Performs DB agnostic
 get_prep_value
                         coercion and validation
                          DB specific coercion/
get_db_prep_value
                               validation

    formfield              Provides a form field

Django Pro ORM

  • 1.
  • 3.
    What is thistalk? ORM Architecture Practical Things 50% 50%
  • 4.
  • 5.
  • 6.
    • A bigrefactor of the ORM internals • Happened a bit before Django 1.0 Old New
  • 7.
  • 8.
    • GSOC projectthis past summer • Added multiple database support to Django • Ripped up a lot of internals, and then put them back together Old New
  • 9.
    from django.db importmodels class MyModel(models.Model): pass MyModel.objects Managers
  • 10.
    class Manager(object): def get_query_set(self): return QuerySet(self.model) def get(self, *args, **kwargs): return self.get_query_set().get(*args, **kwargs) def filter(self, *args, **kwargs): return self.get_query_set().filter(*args, **kwargs) # ETC... get_query_set(), your gateway to the rabbit
  • 11.
    QuerySet • Backend agnostic •Basically it handles turning the public API into calls to methods on Query • Surprisingly little meat for a 1400 line file • Also, a few subclasses for values(), values_list(), and dates()
  • 12.
  • 14.
    • This partis backend specific, SQL vs. GAE here, not Postgres vs. Oracle • It carries all the info around, ordering, joins, where, aggregates, etc... • It used to generate SQL, not anymore (more on this later)
  • 15.
    self.model = model self.alias_refcount= {} self.alias_map = {} self.table_map = {} self.join_map = {} self.rev_join_map = {} self.quote_cache = {} self.default_cols = True self.default_ordering = True self.standard_ordering = True self.ordering_aliases = [] self.select_fields = [] self.related_select_fields = [] self.dupe_avoidance = {} self.used_aliases = set() self.filter_is_sticky = False self.included_inherited_models = {} self.select = [] self.tables = [] self.where = where() self.where_class = where self.group_by = None self.having = where()
  • 16.
    self.order_by = [] self.low_mark,self.high_mark = 0, None self.distinct = False self.select_related = False self.related_select_cols = [] self.aggregates = SortedDict() self.aggregate_select_mask = None self._aggregate_select_cache = None self.max_depth = 5 self.extra = SortedDict() self.extra_select_mask = None self._extra_select_cache = None self.extra_tables = () self.extra_order_by = () self.deferred_loading = (set(), True)
  • 17.
    I told youit was a lot of stuff
  • 18.
    The Major Players Attributes Methods self.where self.get_compiler() self.having self.clone() self.aggregates self.join() self.alias_map self.add_filter() self.select_fields self.add_ordering()
  • 19.
    Dude, where’s my SQL? There are no relevant images for SQL, so you get to settle for me trying to be funny
  • 20.
    SQLCompiler • Takes Querysand turns ‘em into SQL. • Also executes SQL. • This handles the backend specific stuff. • We ship 2 sets of them. 1 for Oracle, and one for pretty much everything else.
  • 21.
    31 Flavors! SQLCompiler SQLInsertCompiler SQLDeleteCompiler SQLDateCompiler SQLUpdateCompiler SQLAggregateCompiler
  • 22.
    django.db.backends.* • Interoperate with • Running the cmd line database drivers shell • Handle most of the • django.contrib.gis.db. nuances of the backends various databases’ SQL dialects • DDL for creation • Introspection
  • 23.
    Putting this Stuffto Good Use aka the part you might use at your day job
  • 24.
  • 25.
    • Django comeswith a few aggregates django.db.models.aggregates Avg • But databases have other Count aggregates Max • Some of them Min even let you StdDev create your own! Sum Variance
  • 26.
    Aggregates Come inTwo Parts • One part is a data carrier, it knows what field you’re aggregating over. • The other part turns that into SQL. • Starting to sound familiar?
  • 27.
    Part I from django.db.modelsimport Aggregate class MyAggregate(Aggregate): def add_to_query(self, query, alias, col, course, is_summary): aggregate = SQLMyAggregate(col, source=source, is_summary=is_summary, **extra) query.aggregates[alias] = aggregate
  • 28.
    Part II from django.db.models.sql.aggregatesimport Aggregate as SQLAggregate class SQLMyAggregate(SQLAggregate): sql_template = "%(function)(cast(%(field)s as numeric) / 100)" sql_function = "MYFUNCTION" is_ordinal = True
  • 29.
  • 31.
    Single Object Caching •Basically we want to automatically cache any get() lookups on unique fields (primary key, slugs, etc.) • These types of queries seem to pop up a lot (think every single foreign key traversal) • Also automatically do invalidation for us.
  • 33.
    from django.core.cache importcache from django.db.models.query import QuerySet class CachingQuerySet(QuerySet): def get(self, *args, **kwargs): sup = lambda: super(CachingQuerySet, self). get(*args, **kwargs) if len(args) != 1 or not kwargs or self.query.where: return sup() key, value = kwargs.iteritems().next() if key.endswith("__exact"): key = key[:-len("__exact")] if key not in ["pk", self.model._meta.pk.name]: return sup() cache_key = "%s:%s:%s" % ( self.model._meta.app_label, self.model._meta.object_name, value ) obj = cache.get(cache_key) if obj is not None: return obj obj = sup() cache.set(cache_key, obj) return obj
  • 34.
    from django.db.models.signals importpre_save, pre_delete class CachingManager(QuerySet): use_for_related_fields = True def get_query_set(self): return CachingQuerySet(self.model) def contribute_to_class(self, *args, **kwargs): super(CachingManager, self). contribute_to_class(*args, **kwargs) pre_save.connect(invalidate_cache, self.model) pre_delete.connect(invalidate_cache, self.model)
  • 35.
    def invalide_cache(instance, sender,**kwargs): cache_key = "%s:%s:%s" % ( instance._meta.app_label, instance._meta.object_name, instance.pk ) cache.delete(cache_key) Aaand, Done!
  • 36.
  • 37.
  • 38.
    Methods Converts value from to_python serialized form to Python validate Performs validation db_type Returns the database type Performs DB agnostic get_prep_value coercion and validation DB specific coercion/ get_db_prep_value validation formfield Provides a form field