SlideShare a Scribd company logo
1 of 37
Download to read offline
PHP7 HASHTABLE
wangtong@panda.tv
2017-07
 PHP7 Array Featrue
 PHP7 HashTable Struct
 HashTable Collisions
 Translation Table
 PHP7 HashTable Operation
 Example, Run php-src By GDB
 Packed HashTables
 Empty HashTable
 Application Scenarios
 PHP5 Hashtable
 Redis Hashtable
Agenda
We already know about php array
• Array 为 HashTable实现
• map,可字符数字做索引建
• Foreach 快于 for
• Foreach 顺序为插入顺序
• Count($arr) 快
• In_array 慢
• 理想情况Find 时间复杂度为 O(1)
• PHP有current,prev,next,each,end,reset等指针函数
PHP7 HashTable Struct
https://github.com/php/php-src/blob/PHP-7.0.11/Zend/zend_types.h#L176
/* ht struct */
struct Bucket {
zval val;
zend_ulong h;
zend_string *key;
} Bucket;
struct HashTable {
zend_refcounted_h gc;
union {
struct { ZEND_ENDIAN_LOHI_4( /*... ellipsis ...*/) } v;
uint32_t flags;
} u;
uint32_t nTableMask; // -nTableSize, -16; (uint)-16==4294967280
Bucket *arData; // array items,
uint32_t nNumUsed; // next slot available in arData
uint32_t nNumOfElements; // total num of busy elements in arData
uint32_t nTableSize; // table size, always a power of two, min:8
uint32_t nInternalPointer; // Used for iteration
zend_long nNextFreeElement; // next integer-based key available
dtor_func_t pDestructor; // data destructor
};
struct zval {
zend_value value; /* value */
union {
struct {
ZEND_ENDIAN_LOHI_4(
zend_uchar type, /* active type */
zend_uchar type_flags,
zend_uchar const_flags,
zend_uchar reserved) /*call info for EX(This*/
} v;
uint32_t type_info;
} u1;
union {
uint32_t var_flags;
uint32_t next; /* hash collision chain */
uint32_t cache_slot; /* literal cache slot */
uint32_t lineno; /* line number (for ast nodes) */
uint32_t num_args; /* arguments number for EX(This) */
uint32_t fe_pos; /* foreach position */
uint32_t fe_iter_idx; /* foreach iterator index */
} u2;
};
union zend_value {
zend_long lval;
double dval;
zend_refcounted *counted;
zend_string *str;
zend_array *arr;
zend_object *obj;
zend_resource *res;
zend_reference *ref;
zend_ast_ref *ast;
zval *zv;
void *ptr;
zend_class_entry *ce;
zend_function *func;
struct {
uint32_t w1;
uint32_t w2;
} ww;
};
Hashtable collisions
链接法:桶是一个可容纳多个数据的数据结构(例如链表或红黑树)
开放寻址法:所有元素都存放在槽中(装载因子<0.5可考虑)
//Hashtable collisions attack
$beginTime = microtime(true);
$size = pow(2,16);
$k = 0;
for($i=0;$i<$size;$i++){
$arr[$k] = 0;
$k += $size;
}
echo microtime(true) - $beginTime;
echo PHP_EOL;
//php5.6 28s, php7 7s
Translation Table
Hash layout
#define HT_HASH_SIZE(nTableMask) (((size_t)(uint32_t)-(int32_t)(nTableMask)) *
sizeof(uint32_t))
#define HT_DATA_SIZE(nTableSize) ((size_t)(nTableSize) * sizeof(Bucket))
#define HT_SIZE_EX(nTableSize, nTableMask) (HT_DATA_SIZE((nTableSize)) +
HT_HASH_SIZE((nTableMask)))
#define HT_SIZE(ht) HT_SIZE_EX((ht)->nTableSize, (ht)->nTableMask)
Bucket *arData;
arData = emalloc(HT_SIZE(ht)); /* now alloc this */
panda.tv tone shop … Bucket
…
val value …
… … … u1
-1 -1 1 u2.next
92236014990
29192316
0 65536 … h h …
0x… 0x… 0x… … *key *key nTableSize-1
0 1 2 3 4i idx=5 idx=6 7
gc …
u …
nTableMask -8
*arData 0x…
nNumUsed 3
nNumOfElements 3
nTableSize 8
nInternalPointer 0
nNextFreeElement 65537
pDestructor 0x…
…
922360…
7
company
gc
h
len
val[1]
2 0
-8 -7 -6 -5 -4 -3 -2 -1
$arr = [
'company'=>'panda.tv',
0=>’tone',
65536=>'shop'
];
0x0
Translation table bucket
PHP7 HashTable :
nIndex idx
PHP7 HashTable operation
https://github.com/php/php-src/blob/PHP-7.0.11/Zend/zend_hash.c#L552
HashTable Init
ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t
pDestructor, zend_bool persistent ZEND_FILE_LINE_DC)
{
GC_REFCOUNT(ht) = 1;
GC_TYPE_INFO(ht) = IS_ARRAY;
ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION |
HASH_FLAG_STATIC_KEYS;
ht->nTableSize = zend_hash_check_size(nSize);
ht->nTableMask = HT_MIN_MASK; // ((uint32_t) -2)
HT_SET_DATA_ADDR(ht, &uninitialized_bucket); //HT_SET_DATA_ADDR(ht, ptr) do { (ht)-
>arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); } while (0)
ht->nNumUsed = 0;
ht->nNumOfElements = 0;
ht->nInternalPointer = HT_INVALID_IDX;
ht->nNextFreeElement = 0;
ht->pDestructor = pDestructor;
}
Translation Table Demo
/* @auth xuruiliang@panda.tv, 在此感谢许老板帮写的demo*/
#include <iostream>
#include <cstdlib>
#include <cassert>
using namespace std;
struct P {
int x, y;
};
const int P_SIZE = 10;
int main()
{
struct P p1 = (struct P){.x = 100, .y = 101};
P *p = (P *)malloc(P_SIZE* (sizeof(int) + sizeof(P)));
assert(p != NULL);
((int *)p)[0] = 10;
p = (P *)((int *)p + P_SIZE);
p[3] = p1;
}
static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key,
zval *pData, uint32_t flag ZEND_FILE_LINE_DC)
{
ZEND_HASH_IF_FULL_DO_RESIZE(ht); //if ((ht)->nNumUsed >= (ht)->nTableSize)
{ zend_hash_do_resize(ht); }
idx = ht->nNumUsed++; /* take the next avalaible slot number */
ht->nNumOfElements++; /* increment number of elements */
/* ... */
p = ht->arData + idx; /* Get the bucket in that slot from arData */
p->key = key; /* Affect it the key we want to insert at */
/* ... */
p->h = h = ZSTR_H(key); /* save the hash of the current key into the bucket */
ZVAL_COPY_VALUE(&p->val, pData); /* Copy the value into the bucket's value : add */
nIndex = h | ht->nTableMask; /* Get the translation table index */
// p->val.u2.next =
Z_NEXT(p->val) = HT_HASH(ht, nIndex); /* Put the actual element as next of us */
// ((uint32_t*)((ht)->arData))[(int32_t)(nIndex)]=((idx) * sizeof(Bucket))
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); /* Put us into the actual translation slot */
HashTable Add
Hashtable添加更新元素
• nIndex为hash后的索引,idx为顺序索引
• nNumUsed+1, 用于idx,>nTableSize将触发扩容
• nNumOfElements+1, 记录实际元素个数
• nTableSize, 用于申请内存空间的大小
• nNextFreeElement+1, 用于下一个自增数字索引值
• p->val.u2.next 实现了hash冲突的解决
• p = ht->arData + idx, Bucket内存地址
Hashtable del
ZEND_API int ZEND_FASTCALL zend_hash_del(HashTable *ht, zend_string *key)
{
/* ... */
h = zend_string_hash_val(key); /* get the hash from the key (assuming string key here) */
nIndex = h | ht->nTableMask; /* get the translation table index */
idx = HT_HASH(ht, nIndex); /* Get the slot corresponding to that translation index */
while (idx != HT_INVALID_IDX) { /* If there is a corresponding slot */
p = HT_HASH_TO_BUCKET(ht, idx); /* Get the bucket from that slot */
if ((p->key == key) || /* Is it the right bucket ? same key pointer ? */
(p->h == h && /* ... or same hash */
p->key && /* and a key (string key based) */
ZSTR_LEN(p->key) == ZSTR_LEN(key) && /* and same key length */
memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) { /* and same key content ? */
_zend_hash_del_el_ex(ht, idx, p, prev); /* that's us ! delete us */
return SUCCESS;
}
prev = p;
idx = Z_NEXT(p->val); /* get the next corresponding slot from current one */
}
return FAILURE;
}
Hash fragmentation, resizing and compacting
HashTable Resize
static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht)
{
IS_CONSISTENT(ht);
HT_ASSERT(GC_REFCOUNT(ht) == 1);
if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //只有到一定阈值才进行rehash操作
HANDLE_BLOCK_INTERRUPTIONS();
zend_hash_rehash(ht); //重建索引数组
HANDLE_UNBLOCK_INTERRUPTIONS();
} else if (ht->nTableSize < HT_MAX_SIZE) { //扩大为两倍
void *new_data, *old_data = HT_GET_DATA_ADDR(ht);
uint32_t nSize = ht->nTableSize + ht->nTableSize;
Bucket *old_buckets = ht->arData;
HANDLE_BLOCK_INTERRUPTIONS();
new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT); //新分配arData空间,大小
为:(sizeof(Bucket) + sizeof(uint32_t)) * nSize
ht->nTableSize = nSize;
ht->nTableMask = -ht->nTableSize; //nTableSize负值
HT_SET_DATA_ADDR(ht, new_data); //将arData指针偏移到Bucket数组起始位置
memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //将旧的Bucket数组拷到新空间
pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT); //释放旧空间
zend_hash_rehash(ht); //重建索引数组
HANDLE_UNBLOCK_INTERRUPTIONS();
} else {
zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht-
>nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
}
}
Example, Run php-src By GDB
git clone -b PHP-7.0.11 git@github.com:php/php-src.git
cd php-src
~/php-src> ./buildconf
~/php-src> ./configure --disable-all --enable-debug --prefix=$HOME/php-debug
~/php-src> make
~/php-src> make install
gdb --args bin/php -f hashtable-debug.php
break /home/1/php-src/Zend/zend_hash.c:839 if h==589
break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key14")==0
break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key2")==0
break /home/1/php-src/Zend/zend_hash.c:839 if h==4153
break /home/1/php-src/Zend/zend_hash.c:561 if strncmp((char *)&key->val,"class_exists",key-
>len)==0
GDB调试方案
<?PHP
$tmp_user = array(
'name'=>'wangtong',
'worker_id'=>'P589',
'589'=>'see-nNextFreeElement', //here @1
'company'=>'panda.tv',
'email'=>'wangtong@panda.tv',
'location'=>'bj-soho-18',
'department01' => 'g-biz',
'department02' => 'g-tech',
1006440989 => 'see-nTableSize',
'key10' => 'pandatv.com',
'key11' => 'shop.gate.panda.tv',
'key12' => 'mall.gate.panda.tv',
'key13' => 'bag.gate.panda.tv',
'key14' => 'see-nTableSize', // here, @2
);
foreach($tmp_user as $k=>$v){
$user_info[$k]=$v; //here @1 @2
}
unset($user_info['worker_id']);
unset($user_info['589']);
unset($user_info['company']);
unset($user_info['email']);
unset($user_info['location']);
unset($user_info['department01']);
unset($user_info['department02']);
unset($user_info['1006440989']);
$user_info['key2']='see_nNumUsed'; // here @3
unset($user_info['key10']);
unset($user_info['key11']);
unset($user_info['key12']);
unset($user_info['key13']);
unset($user_info['key14']);
$user_info['key3']='val3';
$user_info['key4']='val4';
$user_info['4153'] = 'see-nTableSize';//here @
运行示例
示例运行结果
nTable
Size
nNumUs
ed
nNumOfEl
e
nNextFre
eEle
Func Mark
3个item@1 16 3 3 590 _zend_hash_index_a
dd_or_update_i
Init,nTableSize=16;
nNextFreeEle=589+1
14个
item@2
16 14 14 10064409
90
_zend_hash_add_or_
update_i
nNumUsed = 14;
nNumOfElement = 14;
3个item@1 8 3 3 590 _zend_hash_index_a
dd_or_update_i
Init, nTableSize=8;
14个
item@2
16 14 14 10064409
90
_zend_hash_add_or_
update_i
nTableSize *= 2;
Unset后@3 16 15 7 10064409
90
_zend_hash_add_or_
update_i
nNumUsed!=nNumOfEle;
Hash fragmentation
5个item@4 16 5 5 10064409
90
_zend_hash_index_a
dd_or_update_i
nNumUsed = 5;
resizing and compacting;
Packed HashTables
packed hashtables
• 理解为传统意义上的‘数组’,而不是map
• 在packed hashtables中,arHash数组为NULL,查找只会直接在
arData中进行。
• packed hashtable只会作用于键递增的数组,这些数组的key之间
可以有间隔,但必须总是递增的。
• bucket->h是冗余的; bucket->key的值永远都是NULL
• 最简单的理解:用idx做索引,没有转换表,没有key.
Empty hashtable
空hash表
• arData/arHash 数组只会在插入第一个元素时分配内存
• nTableSize(8)& ht->nTableMask (0) == 0
• arHash 数组只有一个带有 INVALID_IDX 值、下标为 0 的元素
(uninitialized_bucket,并且被静态分配了内存)
• 查找时,我们会一直找到 INVALID_IDX 值,意味着 key(实际上你
只想静态分配创建一个空表)没有被找到
Application Scenarios
应用场景
• 自动扩容会导致多次分配内存及复制操作
• 数字索引比字母索引效率更高
• 不会自动缩容,nNumUsed 达到 nTableSize会压缩
• In_array效率会低
• Hash冲突还是要注意的,Dos攻击。
• Foreach的顺序为插入顺序
• 尽量使用 Packed hashtable
• Time33 hash算法适合英文词汇的hash;Time65适合大小写混写hash
• 理想情况下O(1)的时间复杂度,平均查找复杂度为O(L)
一旦 nNumUsed 达到 nTableSize,PHP会通过丢弃任何 UNDEF 的记录,自动压缩 arData 数组
PHP5 Hashtable
https://github.com/php/php-src/blob/PHP-5.6.9/Zend/zend_hash.h#L67
typedef struct _hashtable {
uint nTableSize;
uint nTableMask;
uint nNumOfElements;
ulong nNextFreeElement;
Bucket *pInternalPointer; /* Used for
element traversal */
Bucket *pListHead;
Bucket *pListTail;
Bucket **arBuckets;
dtor_func_t pDestructor;
zend_bool persistent;
unsigned char nApplyCount;
zend_bool bApplyProtection;
#if ZEND_DEBUG
int inconsistent;
#endif
} HashTable;
typedef struct bucket {
ulong h;
uint nKeyLength;
void *pData;
void *pDataPtr;
struct bucket *pListNext;
struct bucket *pListLast;
struct bucket *pNext;
struct bucket *pLast;
const char *arKey;
} Bucket;
PHP5 vs PHP7
• PHP 5.x 每个元素需要 144 bytes。在 PHP 7 中,降低到了 36 bytes,
或者打包情况下 32 bytes
• Bukets 需要单独分配16bytes内存,冗余且降低缓存效率
• Zvals 需要分开分配会产生额外头开销冗余, 16bytes
• 双向链表中的每个bucket需要4个指针用于链表的连接,32字节
• php7更少的内存占用,更好的CPU缓存利用率,更好的性能
• Php7 在线性的内存地址上进行遍历,而不是在一段内存地址随机
的链表上遍历
<?PHP
$startMemory = memory_get_usage();
//$array = range(1, 100000);
for($i=0;$i<100000; $i++){
$array[$i] = $i;
}
echo memory_get_usage() - $startMemory, "
bytesn";
$array['k'.$i] = $i;
PHP5 vs PHP7 memory
Redis HashTable
https://github.com/antirez/redis/blob/2.8/src/dict.h#L69
Redis hashtable
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in
progress if rehashidx == -1 */
int iterators; /* number of iterators
currently running */
} dict;
Redis vs PHP7
• Redis业务场景在存储,所以需要实现扩容的异步化
• Redis hgetall无序,少有顺序遍历业务场景,无需保证顺序
• Redis 使用的是 MurmurHash2,更适用于规律性强的key
感谢
• 感谢极客好人许老板教我C语言
• 感谢cap与大家给我进步的机会,同我一起学习
• 感谢以下开源贡献者
• http://jpauli.github.io/2016/04/08/hashtables.html
• http://www.laruence.com/2009/08/23/1065.html
• http://www.laruence.com/2009/07/23/994.html
• https://juejin.im/entry/58f87f1c44d9040069ca999c
• https://crispgm.com/page/php7-new-hashtable-implementation.html

More Related Content

What's hot

Basic of Exploitation
Basic of ExploitationBasic of Exploitation
Basic of ExploitationJongseok Choi
 
Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016Steffen Wenz
 
Yy
YyYy
Yyyygh
 
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texteSai Ef
 
Cluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in CCluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in CSteffen Wenz
 
Debugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB TricksDebugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB Tricksdutor
 
How to write rust instead of c and get away with it
How to write rust instead of c and get away with itHow to write rust instead of c and get away with it
How to write rust instead of c and get away with itFlavien Raynaud
 
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemWprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemSages
 
Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_miki koganei
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - CompilationsHSA Foundation
 
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)CODE BLUE
 

What's hot (19)

Vcs28
Vcs28Vcs28
Vcs28
 
Php&redis presentation
Php&redis presentationPhp&redis presentation
Php&redis presentation
 
C99.php
C99.phpC99.php
C99.php
 
Basic of Exploitation
Basic of ExploitationBasic of Exploitation
Basic of Exploitation
 
Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016Powered by Python - PyCon Germany 2016
Powered by Python - PyCon Germany 2016
 
Yy
YyYy
Yy
 
Nouveau document texte
Nouveau document texteNouveau document texte
Nouveau document texte
 
Cluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in CCluj.py Meetup: Extending Python in C
Cluj.py Meetup: Extending Python in C
 
Debugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB TricksDebugger Principle Overview & GDB Tricks
Debugger Principle Overview & GDB Tricks
 
How to write rust instead of c and get away with it
How to write rust instead of c and get away with itHow to write rust instead of c and get away with it
How to write rust instead of c and get away with it
 
Sysprog17
Sysprog17Sysprog17
Sysprog17
 
Codes
CodesCodes
Codes
 
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data EcosystemWprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
Wprowadzenie do technologii Big Data / Intro to Big Data Ecosystem
 
Gps c
Gps cGps c
Gps c
 
Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_Compact ordered dict__k_lab_meeting_
Compact ordered dict__k_lab_meeting_
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - Compilations
 
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
 
Npc14
Npc14Npc14
Npc14
 
Virtual Machine Constructions for Dummies
Virtual Machine Constructions for DummiesVirtual Machine Constructions for Dummies
Virtual Machine Constructions for Dummies
 

Similar to Php7 hashtable

Aodv routing protocol code in ns2
Aodv routing protocol code in ns2Aodv routing protocol code in ns2
Aodv routing protocol code in ns2Prof Ansari
 
Program 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdfProgram 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdfezzi552
 
Write a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdfWrite a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdfSANDEEPARIHANT
 
Do the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdfDo the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdfahntagencies
 
Потоки в перле изнутри
Потоки в перле изнутриПотоки в перле изнутри
Потоки в перле изнутриIlya Zelenchuk
 
Describe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdfDescribe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdfarihantstoneart
 
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docxodiliagilby
 
You are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdfYou are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdffortmdu
 
In this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdfIn this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdfcharanjit1717
 
This is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdfThis is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdfkostikjaylonshaewe47
 
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdfC++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdfaassecuritysystem
 
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docxGIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docxLeonardN9WWelchw
 
Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Gabriele Modena
 
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdfincludestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdfgalagirishp
 
In c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdfIn c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdfrajkumarm401
 
Unit 4
Unit 4Unit 4
Unit 4siddr
 
PHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhaustedPHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhaustedPiotr Pasich
 
Hacking hhvm
Hacking hhvmHacking hhvm
Hacking hhvmwajrcs
 
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docxajoy21
 

Similar to Php7 hashtable (20)

Aodv routing protocol code in ns2
Aodv routing protocol code in ns2Aodv routing protocol code in ns2
Aodv routing protocol code in ns2
 
Jamming attack in wireless network
Jamming attack in wireless networkJamming attack in wireless network
Jamming attack in wireless network
 
Program 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdfProgram 4You are to write an efficient program that will read a di.pdf
Program 4You are to write an efficient program that will read a di.pdf
 
Write a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdfWrite a C program that reads the words the user types at the command.pdf
Write a C program that reads the words the user types at the command.pdf
 
Do the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdfDo the following program in C++- Create a item class... with and i.pdf
Do the following program in C++- Create a item class... with and i.pdf
 
Потоки в перле изнутри
Потоки в перле изнутриПотоки в перле изнутри
Потоки в перле изнутри
 
Describe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdfDescribe a data structure that supports both removeMin() and rem.pdf
Describe a data structure that supports both removeMin() and rem.pdf
 
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
__MACOSX._assign3assign3.DS_Store__MACOSXassign3._.D.docx
 
You are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdfYou are to write an efficient program that will read a dictionary of.pdf
You are to write an efficient program that will read a dictionary of.pdf
 
In this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdfIn this lab, you will be given a simple code for a min Heap, and you.pdf
In this lab, you will be given a simple code for a min Heap, and you.pdf
 
This is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdfThis is a c++ binary search program I worked so far but still cant g.pdf
This is a c++ binary search program I worked so far but still cant g.pdf
 
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdfC++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
C++ Language -- Dynamic Memory -- There are 7 files in this project- a.pdf
 
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docxGIVEN CODE template -typename T- class DList { private- struct Node {.docx
GIVEN CODE template -typename T- class DList { private- struct Node {.docx
 
Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2Full stack analytics with Hadoop 2
Full stack analytics with Hadoop 2
 
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdfincludestdio.h #includestdlib.h int enqueue(struct node ,.pdf
includestdio.h #includestdlib.h int enqueue(struct node ,.pdf
 
In c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdfIn c++ format, for each function in the code, please using the comme.pdf
In c++ format, for each function in the code, please using the comme.pdf
 
Unit 4
Unit 4Unit 4
Unit 4
 
PHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhaustedPHPConPl 2013 - Allowed memory size of X bytes exhausted
PHPConPl 2013 - Allowed memory size of X bytes exhausted
 
Hacking hhvm
Hacking hhvmHacking hhvm
Hacking hhvm
 
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
(C++ exercise) 1.Implement a circular, doubly linked list with a has.docx
 

Recently uploaded

AI in Action: Real World Use Cases by Anitaraj
AI in Action: Real World Use Cases by AnitarajAI in Action: Real World Use Cases by Anitaraj
AI in Action: Real World Use Cases by AnitarajAnitaRaj43
 
Less Is More: Utilizing Ballerina to Architect a Cloud Data Platform
Less Is More: Utilizing Ballerina to Architect a Cloud Data PlatformLess Is More: Utilizing Ballerina to Architect a Cloud Data Platform
Less Is More: Utilizing Ballerina to Architect a Cloud Data PlatformWSO2
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistandanishmna97
 
API Governance and Monetization - The evolution of API governance
API Governance and Monetization -  The evolution of API governanceAPI Governance and Monetization -  The evolution of API governance
API Governance and Monetization - The evolution of API governanceWSO2
 
Platformless Horizons for Digital Adaptability
Platformless Horizons for Digital AdaptabilityPlatformless Horizons for Digital Adaptability
Platformless Horizons for Digital AdaptabilityWSO2
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingEdi Saputra
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherRemote DBA Services
 
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...WSO2
 
WSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2
 
JohnPollard-hybrid-app-RailsConf2024.pptx
JohnPollard-hybrid-app-RailsConf2024.pptxJohnPollard-hybrid-app-RailsConf2024.pptx
JohnPollard-hybrid-app-RailsConf2024.pptxJohnPollard37
 
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfRising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfOrbitshub
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Zilliz
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FMESafe Software
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyKhushali Kathiriya
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FMESafe Software
 
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelMcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelDeepika Singh
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...Zilliz
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoffsammart93
 
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Bhuvaneswari Subramani
 

Recently uploaded (20)

AI in Action: Real World Use Cases by Anitaraj
AI in Action: Real World Use Cases by AnitarajAI in Action: Real World Use Cases by Anitaraj
AI in Action: Real World Use Cases by Anitaraj
 
Less Is More: Utilizing Ballerina to Architect a Cloud Data Platform
Less Is More: Utilizing Ballerina to Architect a Cloud Data PlatformLess Is More: Utilizing Ballerina to Architect a Cloud Data Platform
Less Is More: Utilizing Ballerina to Architect a Cloud Data Platform
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistan
 
API Governance and Monetization - The evolution of API governance
API Governance and Monetization -  The evolution of API governanceAPI Governance and Monetization -  The evolution of API governance
API Governance and Monetization - The evolution of API governance
 
Platformless Horizons for Digital Adaptability
Platformless Horizons for Digital AdaptabilityPlatformless Horizons for Digital Adaptability
Platformless Horizons for Digital Adaptability
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
 
Strategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a FresherStrategies for Landing an Oracle DBA Job as a Fresher
Strategies for Landing an Oracle DBA Job as a Fresher
 
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...
WSO2 Micro Integrator for Enterprise Integration in a Decentralized, Microser...
 
WSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering Developers
 
JohnPollard-hybrid-app-RailsConf2024.pptx
JohnPollard-hybrid-app-RailsConf2024.pptxJohnPollard-hybrid-app-RailsConf2024.pptx
JohnPollard-hybrid-app-RailsConf2024.pptx
 
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfRising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelMcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
 

Php7 hashtable

  • 2.  PHP7 Array Featrue  PHP7 HashTable Struct  HashTable Collisions  Translation Table  PHP7 HashTable Operation  Example, Run php-src By GDB  Packed HashTables  Empty HashTable  Application Scenarios  PHP5 Hashtable  Redis Hashtable Agenda
  • 3. We already know about php array • Array 为 HashTable实现 • map,可字符数字做索引建 • Foreach 快于 for • Foreach 顺序为插入顺序 • Count($arr) 快 • In_array 慢 • 理想情况Find 时间复杂度为 O(1) • PHP有current,prev,next,each,end,reset等指针函数
  • 5. /* ht struct */ struct Bucket { zval val; zend_ulong h; zend_string *key; } Bucket; struct HashTable { zend_refcounted_h gc; union { struct { ZEND_ENDIAN_LOHI_4( /*... ellipsis ...*/) } v; uint32_t flags; } u; uint32_t nTableMask; // -nTableSize, -16; (uint)-16==4294967280 Bucket *arData; // array items, uint32_t nNumUsed; // next slot available in arData uint32_t nNumOfElements; // total num of busy elements in arData uint32_t nTableSize; // table size, always a power of two, min:8 uint32_t nInternalPointer; // Used for iteration zend_long nNextFreeElement; // next integer-based key available dtor_func_t pDestructor; // data destructor };
  • 6. struct zval { zend_value value; /* value */ union { struct { ZEND_ENDIAN_LOHI_4( zend_uchar type, /* active type */ zend_uchar type_flags, zend_uchar const_flags, zend_uchar reserved) /*call info for EX(This*/ } v; uint32_t type_info; } u1; union { uint32_t var_flags; uint32_t next; /* hash collision chain */ uint32_t cache_slot; /* literal cache slot */ uint32_t lineno; /* line number (for ast nodes) */ uint32_t num_args; /* arguments number for EX(This) */ uint32_t fe_pos; /* foreach position */ uint32_t fe_iter_idx; /* foreach iterator index */ } u2; }; union zend_value { zend_long lval; double dval; zend_refcounted *counted; zend_string *str; zend_array *arr; zend_object *obj; zend_resource *res; zend_reference *ref; zend_ast_ref *ast; zval *zv; void *ptr; zend_class_entry *ce; zend_function *func; struct { uint32_t w1; uint32_t w2; } ww; };
  • 7. Hashtable collisions 链接法:桶是一个可容纳多个数据的数据结构(例如链表或红黑树) 开放寻址法:所有元素都存放在槽中(装载因子<0.5可考虑) //Hashtable collisions attack $beginTime = microtime(true); $size = pow(2,16); $k = 0; for($i=0;$i<$size;$i++){ $arr[$k] = 0; $k += $size; } echo microtime(true) - $beginTime; echo PHP_EOL; //php5.6 28s, php7 7s
  • 9. Hash layout #define HT_HASH_SIZE(nTableMask) (((size_t)(uint32_t)-(int32_t)(nTableMask)) * sizeof(uint32_t)) #define HT_DATA_SIZE(nTableSize) ((size_t)(nTableSize) * sizeof(Bucket)) #define HT_SIZE_EX(nTableSize, nTableMask) (HT_DATA_SIZE((nTableSize)) + HT_HASH_SIZE((nTableMask))) #define HT_SIZE(ht) HT_SIZE_EX((ht)->nTableSize, (ht)->nTableMask) Bucket *arData; arData = emalloc(HT_SIZE(ht)); /* now alloc this */
  • 10. panda.tv tone shop … Bucket … val value … … … … u1 -1 -1 1 u2.next 92236014990 29192316 0 65536 … h h … 0x… 0x… 0x… … *key *key nTableSize-1 0 1 2 3 4i idx=5 idx=6 7 gc … u … nTableMask -8 *arData 0x… nNumUsed 3 nNumOfElements 3 nTableSize 8 nInternalPointer 0 nNextFreeElement 65537 pDestructor 0x… … 922360… 7 company gc h len val[1] 2 0 -8 -7 -6 -5 -4 -3 -2 -1 $arr = [ 'company'=>'panda.tv', 0=>’tone', 65536=>'shop' ]; 0x0 Translation table bucket PHP7 HashTable : nIndex idx
  • 12. HashTable Init ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent ZEND_FILE_LINE_DC) { GC_REFCOUNT(ht) = 1; GC_TYPE_INFO(ht) = IS_ARRAY; ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION | HASH_FLAG_STATIC_KEYS; ht->nTableSize = zend_hash_check_size(nSize); ht->nTableMask = HT_MIN_MASK; // ((uint32_t) -2) HT_SET_DATA_ADDR(ht, &uninitialized_bucket); //HT_SET_DATA_ADDR(ht, ptr) do { (ht)- >arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); } while (0) ht->nNumUsed = 0; ht->nNumOfElements = 0; ht->nInternalPointer = HT_INVALID_IDX; ht->nNextFreeElement = 0; ht->pDestructor = pDestructor; }
  • 13. Translation Table Demo /* @auth xuruiliang@panda.tv, 在此感谢许老板帮写的demo*/ #include <iostream> #include <cstdlib> #include <cassert> using namespace std; struct P { int x, y; }; const int P_SIZE = 10; int main() { struct P p1 = (struct P){.x = 100, .y = 101}; P *p = (P *)malloc(P_SIZE* (sizeof(int) + sizeof(P))); assert(p != NULL); ((int *)p)[0] = 10; p = (P *)((int *)p + P_SIZE); p[3] = p1; }
  • 14. static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key, zval *pData, uint32_t flag ZEND_FILE_LINE_DC) { ZEND_HASH_IF_FULL_DO_RESIZE(ht); //if ((ht)->nNumUsed >= (ht)->nTableSize) { zend_hash_do_resize(ht); } idx = ht->nNumUsed++; /* take the next avalaible slot number */ ht->nNumOfElements++; /* increment number of elements */ /* ... */ p = ht->arData + idx; /* Get the bucket in that slot from arData */ p->key = key; /* Affect it the key we want to insert at */ /* ... */ p->h = h = ZSTR_H(key); /* save the hash of the current key into the bucket */ ZVAL_COPY_VALUE(&p->val, pData); /* Copy the value into the bucket's value : add */ nIndex = h | ht->nTableMask; /* Get the translation table index */ // p->val.u2.next = Z_NEXT(p->val) = HT_HASH(ht, nIndex); /* Put the actual element as next of us */ // ((uint32_t*)((ht)->arData))[(int32_t)(nIndex)]=((idx) * sizeof(Bucket)) HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); /* Put us into the actual translation slot */ HashTable Add
  • 15. Hashtable添加更新元素 • nIndex为hash后的索引,idx为顺序索引 • nNumUsed+1, 用于idx,>nTableSize将触发扩容 • nNumOfElements+1, 记录实际元素个数 • nTableSize, 用于申请内存空间的大小 • nNextFreeElement+1, 用于下一个自增数字索引值 • p->val.u2.next 实现了hash冲突的解决 • p = ht->arData + idx, Bucket内存地址
  • 16. Hashtable del ZEND_API int ZEND_FASTCALL zend_hash_del(HashTable *ht, zend_string *key) { /* ... */ h = zend_string_hash_val(key); /* get the hash from the key (assuming string key here) */ nIndex = h | ht->nTableMask; /* get the translation table index */ idx = HT_HASH(ht, nIndex); /* Get the slot corresponding to that translation index */ while (idx != HT_INVALID_IDX) { /* If there is a corresponding slot */ p = HT_HASH_TO_BUCKET(ht, idx); /* Get the bucket from that slot */ if ((p->key == key) || /* Is it the right bucket ? same key pointer ? */ (p->h == h && /* ... or same hash */ p->key && /* and a key (string key based) */ ZSTR_LEN(p->key) == ZSTR_LEN(key) && /* and same key length */ memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) { /* and same key content ? */ _zend_hash_del_el_ex(ht, idx, p, prev); /* that's us ! delete us */ return SUCCESS; } prev = p; idx = Z_NEXT(p->val); /* get the next corresponding slot from current one */ } return FAILURE; }
  • 18. HashTable Resize static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) { IS_CONSISTENT(ht); HT_ASSERT(GC_REFCOUNT(ht) == 1); if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //只有到一定阈值才进行rehash操作 HANDLE_BLOCK_INTERRUPTIONS(); zend_hash_rehash(ht); //重建索引数组 HANDLE_UNBLOCK_INTERRUPTIONS(); } else if (ht->nTableSize < HT_MAX_SIZE) { //扩大为两倍 void *new_data, *old_data = HT_GET_DATA_ADDR(ht); uint32_t nSize = ht->nTableSize + ht->nTableSize; Bucket *old_buckets = ht->arData; HANDLE_BLOCK_INTERRUPTIONS(); new_data = pemalloc(HT_SIZE_EX(nSize, -nSize), ht->u.flags & HASH_FLAG_PERSISTENT); //新分配arData空间,大小 为:(sizeof(Bucket) + sizeof(uint32_t)) * nSize ht->nTableSize = nSize; ht->nTableMask = -ht->nTableSize; //nTableSize负值 HT_SET_DATA_ADDR(ht, new_data); //将arData指针偏移到Bucket数组起始位置 memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //将旧的Bucket数组拷到新空间 pefree(old_data, ht->u.flags & HASH_FLAG_PERSISTENT); //释放旧空间 zend_hash_rehash(ht); //重建索引数组 HANDLE_UNBLOCK_INTERRUPTIONS(); } else { zend_error_noreturn(E_ERROR, "Possible integer overflow in memory allocation (%zu * %zu + %zu)", ht- >nTableSize * 2, sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket)); } }
  • 20. git clone -b PHP-7.0.11 git@github.com:php/php-src.git cd php-src ~/php-src> ./buildconf ~/php-src> ./configure --disable-all --enable-debug --prefix=$HOME/php-debug ~/php-src> make ~/php-src> make install gdb --args bin/php -f hashtable-debug.php break /home/1/php-src/Zend/zend_hash.c:839 if h==589 break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key14")==0 break /home/1/php-src/Zend/zend_hash.c:628 if strcmp((char *)&key->val,"key2")==0 break /home/1/php-src/Zend/zend_hash.c:839 if h==4153 break /home/1/php-src/Zend/zend_hash.c:561 if strncmp((char *)&key->val,"class_exists",key- >len)==0 GDB调试方案
  • 21. <?PHP $tmp_user = array( 'name'=>'wangtong', 'worker_id'=>'P589', '589'=>'see-nNextFreeElement', //here @1 'company'=>'panda.tv', 'email'=>'wangtong@panda.tv', 'location'=>'bj-soho-18', 'department01' => 'g-biz', 'department02' => 'g-tech', 1006440989 => 'see-nTableSize', 'key10' => 'pandatv.com', 'key11' => 'shop.gate.panda.tv', 'key12' => 'mall.gate.panda.tv', 'key13' => 'bag.gate.panda.tv', 'key14' => 'see-nTableSize', // here, @2 ); foreach($tmp_user as $k=>$v){ $user_info[$k]=$v; //here @1 @2 } unset($user_info['worker_id']); unset($user_info['589']); unset($user_info['company']); unset($user_info['email']); unset($user_info['location']); unset($user_info['department01']); unset($user_info['department02']); unset($user_info['1006440989']); $user_info['key2']='see_nNumUsed'; // here @3 unset($user_info['key10']); unset($user_info['key11']); unset($user_info['key12']); unset($user_info['key13']); unset($user_info['key14']); $user_info['key3']='val3'; $user_info['key4']='val4'; $user_info['4153'] = 'see-nTableSize';//here @ 运行示例
  • 22. 示例运行结果 nTable Size nNumUs ed nNumOfEl e nNextFre eEle Func Mark 3个item@1 16 3 3 590 _zend_hash_index_a dd_or_update_i Init,nTableSize=16; nNextFreeEle=589+1 14个 item@2 16 14 14 10064409 90 _zend_hash_add_or_ update_i nNumUsed = 14; nNumOfElement = 14; 3个item@1 8 3 3 590 _zend_hash_index_a dd_or_update_i Init, nTableSize=8; 14个 item@2 16 14 14 10064409 90 _zend_hash_add_or_ update_i nTableSize *= 2; Unset后@3 16 15 7 10064409 90 _zend_hash_add_or_ update_i nNumUsed!=nNumOfEle; Hash fragmentation 5个item@4 16 5 5 10064409 90 _zend_hash_index_a dd_or_update_i nNumUsed = 5; resizing and compacting;
  • 24. packed hashtables • 理解为传统意义上的‘数组’,而不是map • 在packed hashtables中,arHash数组为NULL,查找只会直接在 arData中进行。 • packed hashtable只会作用于键递增的数组,这些数组的key之间 可以有间隔,但必须总是递增的。 • bucket->h是冗余的; bucket->key的值永远都是NULL • 最简单的理解:用idx做索引,没有转换表,没有key.
  • 26. 空hash表 • arData/arHash 数组只会在插入第一个元素时分配内存 • nTableSize(8)& ht->nTableMask (0) == 0 • arHash 数组只有一个带有 INVALID_IDX 值、下标为 0 的元素 (uninitialized_bucket,并且被静态分配了内存) • 查找时,我们会一直找到 INVALID_IDX 值,意味着 key(实际上你 只想静态分配创建一个空表)没有被找到
  • 28. 应用场景 • 自动扩容会导致多次分配内存及复制操作 • 数字索引比字母索引效率更高 • 不会自动缩容,nNumUsed 达到 nTableSize会压缩 • In_array效率会低 • Hash冲突还是要注意的,Dos攻击。 • Foreach的顺序为插入顺序 • 尽量使用 Packed hashtable • Time33 hash算法适合英文词汇的hash;Time65适合大小写混写hash • 理想情况下O(1)的时间复杂度,平均查找复杂度为O(L) 一旦 nNumUsed 达到 nTableSize,PHP会通过丢弃任何 UNDEF 的记录,自动压缩 arData 数组
  • 30. typedef struct _hashtable { uint nTableSize; uint nTableMask; uint nNumOfElements; ulong nNextFreeElement; Bucket *pInternalPointer; /* Used for element traversal */ Bucket *pListHead; Bucket *pListTail; Bucket **arBuckets; dtor_func_t pDestructor; zend_bool persistent; unsigned char nApplyCount; zend_bool bApplyProtection; #if ZEND_DEBUG int inconsistent; #endif } HashTable; typedef struct bucket { ulong h; uint nKeyLength; void *pData; void *pDataPtr; struct bucket *pListNext; struct bucket *pListLast; struct bucket *pNext; struct bucket *pLast; const char *arKey; } Bucket;
  • 31.
  • 32. PHP5 vs PHP7 • PHP 5.x 每个元素需要 144 bytes。在 PHP 7 中,降低到了 36 bytes, 或者打包情况下 32 bytes • Bukets 需要单独分配16bytes内存,冗余且降低缓存效率 • Zvals 需要分开分配会产生额外头开销冗余, 16bytes • 双向链表中的每个bucket需要4个指针用于链表的连接,32字节 • php7更少的内存占用,更好的CPU缓存利用率,更好的性能 • Php7 在线性的内存地址上进行遍历,而不是在一段内存地址随机 的链表上遍历
  • 33. <?PHP $startMemory = memory_get_usage(); //$array = range(1, 100000); for($i=0;$i<100000; $i++){ $array[$i] = $i; } echo memory_get_usage() - $startMemory, " bytesn"; $array['k'.$i] = $i; PHP5 vs PHP7 memory
  • 35. Redis hashtable typedef struct dictEntry { void *key; union { void *val; uint64_t u64; int64_t s64; double d; } v; struct dictEntry *next; } dictEntry; /* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */ typedef struct dictht { dictEntry **table; unsigned long size; unsigned long sizemask; unsigned long used; } dictht; typedef struct dict { dictType *type; void *privdata; dictht ht[2]; long rehashidx; /* rehashing not in progress if rehashidx == -1 */ int iterators; /* number of iterators currently running */ } dict;
  • 36. Redis vs PHP7 • Redis业务场景在存储,所以需要实现扩容的异步化 • Redis hgetall无序,少有顺序遍历业务场景,无需保证顺序 • Redis 使用的是 MurmurHash2,更适用于规律性强的key
  • 37. 感谢 • 感谢极客好人许老板教我C语言 • 感谢cap与大家给我进步的机会,同我一起学习 • 感谢以下开源贡献者 • http://jpauli.github.io/2016/04/08/hashtables.html • http://www.laruence.com/2009/08/23/1065.html • http://www.laruence.com/2009/07/23/994.html • https://juejin.im/entry/58f87f1c44d9040069ca999c • https://crispgm.com/page/php7-new-hashtable-implementation.html