Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Hacking PostgreSQL. Физическое представление данных

1,758 views

Published on

-

Published in: Data & Analytics
  • Be the first to comment

  • Be the first to like this

Hacking PostgreSQL. Физическое представление данных

  1. 1. www.postgrespro.ru Физическое Представление данных Hacking PostgreSQL 24.03.2016
  2. 2. 2 Содержание 1.OID 2.PGDATA 3.Storage manager 4.Heap 5.Forks 6.Page
  3. 3. 3 Ответ на вопрос Вопрос: Где найти номера стратегий для написания класса операторов? Ответ: src/include/access/stratnum.h
  4. 4. 4 Relation – объект базы данных src/include/catalog/pg_class.h #define RELKIND_RELATION 'r' /* ordinary table */ #define RELKIND_INDEX 'i' /* secondary index */ #define RELKIND_SEQUENCE 'S' /* sequence object */ #define RELKIND_TOASTVALUE 't' /* for out-of-line values */ #define RELKIND_VIEW 'v' /* view */ #define RELKIND_COMPOSITE_TYPE 'c' /* composite type */ #define RELKIND_FOREIGN_TABLE 'f' /* foreign table */ #define RELKIND_MATVIEW 'm' /* materialized view */
  5. 5. 5 OID src/include/postgres_ext.h /* Object ID is a fundamental type in Postgres. */ typedef unsigned int Oid; #define InvalidOid ((Oid) 0) src/include/access/transam.h #define FirstBootstrapObjectId 10000 #define FirstNormalObjectId 16384
  6. 6. 6 Генератор OID src/backend/catalog/catalog.c Oid GetNewOid (Relation relation) Oid GetNewRelFileNode (Oid reltablespace, Relation pg_class, char relpersistence) src/backend/access/transam/varsup.c Oid GetNewObjectId()
  7. 7. 7 Выбор OID src/include/catalog/unused_oids src/include/catalog/duplicate_oids
  8. 8. 8 Приведение типов SELECT oid, relname FROM pg_class LIMIT 1; oid | relname ------+-------------- 2619 | pg_statistic SELECT * FROM pg_attribute WHERE attrelid = 'pg_statistic'::regclass; SELECT * FROM pg_attribute WHERE attrelid = (SELECT oid FROM pg_class WHERE relname = 'pg_statistic'); SELECT 'pg_statistic'::regclass::oid; oid ------ 2619 SELECT 2619::regclass; regclass -------------- pg_statistic
  9. 9. 9 Вопросы и источники ● Что происходит при OID wraparond? ● Сказывается ли это на производительности? ● Могут ли закончиться OID? ● В каком случае, и что тогда произойдет? ● Документация про OID ● [HACKERS] 32bit OID wrap around conceerns
  10. 10. 10 $PGDATA PGDATA="/home/anastasia/projects/postgresql_data" ls -CF base/ pg_multixact/ pg_tblspc/ global/ pg_notify/ pg_twophase/ pg_clog/ pg_replslot/ PG_VERSION pg_commit_ts/ pg_serial/ pg_xlog/ pg_dynshmem/ pg_snapshots/ postgresql.auto.conf pg_hba.conf pg_stat/ postgresql.conf pg_ident.conf pg_stat_tmp/ postmaster.opts pg_logical/ pg_subtrans/
  11. 11. 11 $PGDATA/global ls -CF 1136 1233 2396_vm 2846_vm 4060 1136_fsm 1260 2397 2847 4060_vm 1136_vm 1260_fsm 2671 2964 4061 1137 1260_vm 2672 2964_vm 6000 1213 1261 2676 2965 6000_vm 1213_fsm 1261_vm 2677 2966 6001 1213_vm 1262 2694 2966_vm 6002 1214 1262_fsm 2695 2967 pg_control 1214_fsm 1262_vm 2697 3592 pg_filenode.map 1214_vm 2396 2698 3592_vm pg_internal.init 1232 2396_fsm 2846 3593
  12. 12. 12 Файлы в pg_global postgres=# SELECT oid, spcname FROM pg_tablespace ; oid | spcname ------+------------ 1663 | pg_default 1664 | pg_global postgres=# SELECT oid, relfilenode, relname FROM pg_class WHERE reltablespace = 1664 ORDER BY oid; oid | relfilenode | relname ------+-------------+----------------------------------------- 1136 | 0 | pg_pltemplate 1137 | 0 | pg_pltemplate_name_index 1213 | 0 | pg_tablespace 1214 | 0 | pg_shdepend 1232 | 0 | pg_shdepend_depender_index 1233 | 0 | pg_shdepend_reference_index 1260 | 0 | pg_authid 1261 | 0 | pg_auth_members 1262 | 0 | pg_database 2396 | 0 | pg_shdescription 2397 | 0 | pg_shdescription_o_c_index 2671 | 0 | pg_database_datname_index
  13. 13. 13 Файлы в pg_global (2) postgres=# SELECT oid, relfilenode, relname FROM pg_class WHERE reltablespace = 1664 ORDER BY oid; oid | relfilenode | relname ------+-------------+----------------------------------------- 2676 | 0 | pg_authid_rolname_index 2677 | 0 | pg_authid_oid_index 2694 | 0 | pg_auth_members_role_member_index 2695 | 0 | pg_auth_members_member_role_index 2697 | 0 | pg_tablespace_oid_index 2698 | 0 | pg_tablespace_spcname_index 2846 | 0 | pg_toast_2396 2847 | 0 | pg_toast_2396_index 2964 | 0 | pg_db_role_setting 2965 | 0 | pg_db_role_setting_databaseid_rol_index 2966 | 0 | pg_toast_2964 2967 | 0 | pg_toast_2964_index 3592 | 0 | pg_shseclabel 3593 | 0 | pg_shseclabel_object_index 4060 | 0 | pg_toast_3592 4061 | 0 | pg_toast_3592_index 6000 | 0 | pg_replication_origin 6001 | 0 | pg_replication_origin_roiident_index
  14. 14. 14 Tablespaces src/include/catalog/pg_tablespace.h Документация pg_tblspc/ ● содержит ссылки на директории ● view pg_tablespace содержит имена объектов и их идентификаторы ● НЕ является независимой частью кластера!
  15. 15. 15 $PGDATA/base ls -CF 1/ 12423/ 12424/ 16501/ pgsql_tmp/ contrib/oid2name $oid2name All databases: Oid Database Name Tablespace ---------------------------------- 16501 db pg_default 12424 postgres pg_default 12423 template0 pg_default 1 template1 pg_default
  16. 16. 16 pg_relation_filepath src/backend/utils/adt/dbsize.c db=# df pg*relation* List of functions Schema | Name | Result data type | Argument data types | Type ------------+--------------------------+------------------+---------------------+-------- pg_catalog | pg_filenode_relation | regclass | oid, oid | normal pg_catalog | pg_relation_filenode | oid | regclass | normal pg_catalog | pg_relation_filepath | text | regclass | normal pg_catalog | pg_relation_is_updatable | integer | regclass, boolean | normal pg_catalog | pg_relation_size | bigint | regclass | normal pg_catalog | pg_relation_size | bigint | regclass, text | normal pg_catalog | pg_total_relation_size | bigint | regclass | normal db=# SELECT pg_relation_filepath('tbl'); pg_relation_filepath ---------------------- base/16497/16498 db=# SELECT pg_filenode_relation(0, 16498); pg_filenode_relation ---------------------- tbl
  17. 17. 17 $PGDATA/base/16501 db=# CREATE TABLE tbl (a int, b int); db=# SELECT oid, relname, relfilenode FROM pg_class WHERE relname='tbl'; oid | relname | relfilenode -------+---------+------------- 16502 | tbl | 16502 ~/projects/postgresql_data/base/16501$ ls 16502* 16502 ~/projects/postgresql_data/base/16501$ wc -c 16502 /*абсолютно пустой файл*/ db=# INSERT INTO tbl VALUES (1,1); ~/projects/postgresql_data/base/16501$ wc -c 16502 /*снова абсолютно пустой файл*/
  18. 18. 18 pg_buffercache db=# CREATE EXTENSION pg_buffercache; db=# SELECT * FROM pg_buffercache WHERE relfilenode ='tbl'::regclass ; -[ RECORD 1 ]----+------ bufferid | 454 relfilenode | 16502 reltablespace | 1663 reldatabase | 16501 relforknumber | 0 relblocknumber | 0 isdirty | t usagecount | 2 pinning_backends | 0 db=# CHECKPOINT; db=# SELECT * FROM pg_buffercache WHERE relfilenode ='tbl'::regclass ; -[ RECORD 1 ]----+------ bufferid | 454 relfilenode | 16502 reltablespace | 1663 reldatabase | 16501 relforknumber | 0 relblocknumber | 0 isdirty | f usagecount | 2 pinning_backends | 0
  19. 19. 19 pageinspect ~/projects/postgresql_data/base/16501$ wc -c 16502 /*В файле появились данные!*/ db=# CREATE EXTENSION pageinspect; db=# select * from heap_page_items(get_raw_page('tbl',0)); -[ RECORD 1 ]------------------- lp | 1 lp_off | 8160 lp_flags | 1 lp_len | 32 t_xmin | 720 t_xmax | 0 t_field3 | 0 t_ctid | (0,1) t_infomask2 | 2 t_infomask | 2048 t_hoff | 24 t_bits | t_oid | t_data | x0100000001000000
  20. 20. 20 oid vs relfilenode db=# SELECT oid, relname,relfilenode FROM pg_class WHERE relname='tbl'; oid | relname | relfilenode -------+---------+------------- 16502 | tbl | 16502 db=# TRUNCATE tbl ; db=# SELECT oid, relname,relfilenode FROM pg_class WHERE relname='tbl'; oid | relname | relfilenode -------+---------+------------- 16502 | tbl | 16531
  21. 21. 21 RelFileNode src/include/storage/relfilenode.h typedef struct RelFileNode { Oid spcNode; /* tablespace */ Oid dbNode; /* database */ Oid relNode; /* relation */ } RelFileNode; typedef struct RelFileNodeBackend { RelFileNode node; BackendId backend; } RelFileNodeBackend;
  22. 22. 22 Storage manager
  23. 23. 23 Storage manager src/backend/storage/smgr/README src/backend/storage/smgr.c src/backend/storage/md.c
  24. 24. 24 Страница(блок) – единица I/O src/include/storage/bufpage.h src/backend/access/nbtree/nbtpage.c Разные методы доступа отличаются структурой данных на странице
  25. 25. 25 Сегмент (файл) - набор блоков src/include/storage/block.h typedef uint32 BlockNumber; #define InvalidBlockNumber ((BlockNumber) 0xFFFFFFFF) #define MaxBlockNumber (BlockNumber) 0xFFFFFFFE) typedef struct BlockIdData { uint16 bi_hi; uint16 bi_lo; } BlockIdData; /* block identifier */ typedef BlockIdData *BlockId;
  26. 26. 26 Relation – набор сегментов + системные данные (forks)
  27. 27. 27 Forks src/include/common/relpath.h typedef enum ForkNumber { InvalidForkNumber = -1, MAIN_FORKNUM = 0, FSM_FORKNUM, VISIBILITYMAP_FORKNUM, INIT_FORKNUM } ForkNumber; src/common/relpath.c const char *const forkNames[] = { "main", /* MAIN_FORKNUM */ "fsm", /* FSM_FORKNUM */ "vm", /* VISIBILITYMAP_FORKNUM */ "init" /* INIT_FORKNUM */ };
  28. 28. 28 MAIN src/include/catalog/storage.h src/backend/catalog/storage.c src/include/storage/buf.h src/include/storage/bufmgr.h void RelationCreateStorage(RelFileNode rnode, char relpersistence) Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
  29. 29. 29 FreeSpaceMap
  30. 30. 30 FreeSpaceMap src/backend/storage/freespace/README src/backend/storage/freespace/freespace.c BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
  31. 31. 31 VisibilityMap src/backend/access/heap/visibilitymap.c 2 бита на страницу: ● All visible ● All frozen
  32. 32. 32 _init http://www.postgresql.org/docs/devel/static/storage-init.html Для unlogged Relations
  33. 33. 33 Страница src/include/storage/bufpage.h * +----------------+---------------------------------+ * | PageHeaderData | linp1 linp2 linp3 ... | * +-----------+----+---------------------------------+ * | ... linpN | | * +-----------+--------------------------------------+ * | ^ pd_lower | * | | * | v pd_upper | * +-------------+------------------------------------+ * | | tupleN ... | * +-------------+------------------+-----------------+ * | ... tuple3 tuple2 tuple1 | "special space" | * +--------------------------------+-----------------+
  34. 34. 34 PageHeaderData src/include/storage/bufpage.h typedef struct PageHeaderData { /* XXX LSN is member of *any* block, not only page-organized ones */ PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog * record for last change to this page */ uint16 pd_checksum; /* checksum */ uint16 pd_flags; /* flag bits, see below */ LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData;
  35. 35. 35 PageAddItem src/include/storage/bufpage.h src/backend/storage/page/bufpage.c OffsetNumber PageAddItem(Page page, Item item, Size size, OffsetNumber offsetNumber, bool overwrite, bool is_heap)
  36. 36. 36 Домашнее задание. Подумать. ● Что происходит при OID wraparond? ● Могут ли закончиться OID?
  37. 37. 37 Домашнее задание. ● Простое ● Регрессионные тесты для pageinspect ● Перенести функции gevel в pageinspect ● Контриб, который воспроизводимо ломает данные в файле таблицы/индекса (чисто в академических целях). Может быть функция в pageinspect. ● Сложное ● Обновление default значений ● Чтение данных из битых файлов
  38. 38. 38 Источники ● http://www.interdb.jp/pg/pgsql01.html ● http://www.slideshare.net/FedericoCampoli/10-things-pos ● https://momjian.us/main/presentations/internals.html ● https://wiki.postgresql.org/images/8/81/FSM_and_Visibilit
  39. 39. 39 Ребята, помогите Насте успеть к Feature Freeze ● https://commitfest.postgresql.org/9/433/ ● https://commitfest.postgresql.org/9/494/
  40. 40. 40 Отмена лекций Занятий 31.03 и 07.04 не будет
  41. 41. www.postgrespro.ru Спасибо за внимание! Вопросы? Hacking PostgreSQL 24.03.2016 hacking@postgrespro.ru

×