字典和散列表

字典和散列表

创建字典

function Dictionary() {
    let items = {};
    this.has = function(key) {
        return key in items;
    };
    this.set = function(key, value) {
        items[key] = value;
    };
    this.delete = function(key){
        if(this.has(key)){
            delete items[key];
            return true;
        }
        return false;
    };
    this.get = function(key){
        return this.has(key) ? items[key] :undefined;
    };
    this.values = function(){
        let values = [];
        for(let k in items){
            if(this.has(k)){
                values.push(items[k]);
            }
        }
        return values;
    };
    this.keys = function(){
        return Object.keys(items);
    };
    this.getItems = function(){
        return items;
    }
}

散列表

散列算法的作用是尽可能快地在数据结构中找到一个值。使用散列函数就能知道值的具体位置，因此能够快速检索到该值。散列函数的作用是给定一个键值，然后返回值在表中的地址。

function HashTable() {
    let table = [];
    //实现一个散列函数，它是类的一个私有方法
    //给定一个key参数，就能根据组成key的每个字符的ASCII码值的和得到一个数字。
    //为了得到比较小的数值，我们会使用hash值和一个任意数做除法的余数。
    let loseloseHashCode = function(key) {
        let hash = 0;
        for(let i = 0; i<key.length; i++) {
            hash += key.charCodeAt(i);
        }
        return hash % 37;
    };
    //首先根据给定的key，我们需要根据散列函数计算出它在表中的位置
    this.put = function(key, value) {
        let position = loseloseHashCode(key);
        // console.log(position + ' - ' + key);
        table[position] = value;
    };
    this.get = function(key) {
        return table[loseloseHashCode(key)];
    };
    //不需要将位置也移除。由于元素分布于整个数组范围内，一些位置会没有任何元素占据，并默认为undefined。
    //不能将位置本身从数组中移除（这会改变其他元素位置），否则当下次需要获得或移除一个元素时，这个元素会不在
    //我们用散列函数求出的位置上。
    this.remove = function(key) {
        table[loseloseHashCode(key)] = undefined;
    };
    this.print = function() {
        for(let i = 0; i<table.length; ++i) {
            if(table[i] !== undefined) {
                console.log(i + ": " + table[i]);
            }
        }
    };
}
let hash = new HashTable();
hash.put('Candalf', 'gandalf@email.com');
hash.put('John', 'johnsnow@email.com');
hash.put('Tyrion', 'tyrion@email.com');
console.log(hash.get('Candalf'));  //gandalf@email.com

处理散列表中的冲突

有时候，一些键会有相同的散列值。不同的值在散列表中对应相同位置的时候，我们称其为冲突。后面添加的元素会覆盖前面的。处理冲突主要有：分离链接、线性探查、双散列法。

分离链接

分离链接法包括为散列表的每一个位置创建一个链表并将元素存储在里面。是处理冲突的简单方法，但是还需要额外的存储空间。

要创建一个新的辅助类来表示将其加入LinkedList实例的元素，叫他ValuePair类：

let ValuePair = function(key, value) {
    this.key= key;
    this.value = value;
    this.toString = function(){
        return '[' + this.key + ' - ' + this.value + ']';
    }
};

//put方法
this.put = function(key, value) {
    let position = loseloseHashCode(key);
    if(table[position] == undefined) {
        table[position] = new LinkedList();
    }
    table[position].append(new valuePair(key, value));
};
//get方法
this.get = function(key) {
    let position = loseloseHashCode(key);
    if(table[position] !== undefined){
        let current = table[position].getHead();
        //遍历链表来寻找键/值
        while(current.next) {
            if(current.element.key === key) {
                return current.element.value;
            }
            current = current.next;
        }
        //检查元素在链表第一个或最后一个节点的情况
        if(current.element.key === key){
            return current.element.value;
        }
    }
    return undefined
}
//remove方法
this.remove = function(key) {
    let position = loseloseHashCode(key);
    if(table[position] !== undefined) {
        let current = table[position].getHead();
        while(current.next){
            if(current.element.key === key) {
                table[position].remove(current.element);
                if(table[position].isEmpty()){
                    table[position] = undefined;
                }
                return true
            }
            current = current.next;
        }
        //检查是否为第一个或最后一个元素
        if(current.element.key === key) {
            table[position].remove(current.element);
            if(table[position].isEmpty()) {
               table[position] = undefined;
               }
            return true;
        }
    }
    return false;
}

线性探查

当向表中某个位置加入一个新元素的时候，如果索引为index的位置已经被占，就尝试index+1，+2。。。

//put方法
this.put = function(key,value){
    let position = loseloseHashCode(key);
    if(table[position] == undefined) {
        table[position] = new ValuePair(key, value);
    } else {
        let index = ++position;
        while (table[index] != undefined){
            index++;
        }
        table[index] = new ValuePair(key, value);
    }
};
//get方法
this.get = function(key){
    let position = loseloseHashCode(key);
    if(table[position] !== undefined) {
        if(table[position].key === key) {
            return table[position].value;
        } else {
            let index = ++position;
            while(table[index] === undefined || table[index].key !== key) {
                index++;
            }
            if(table[index].key === key) {
                return table[index].value;
            }
        }
    }
    return undefined;
};
//remove方法
this.get = function(key){
    let position = loseloseHashCode(key);
    if(table[position] !== undefined) {
        if(table[position].key === key) {
            table[index] = undefined;
        } else {
            let index = ++position;
            while(table[index] === undefined || table[index].key !== key) {
                index++;
            }
            if(table[index].key === key) {
                table[index] = undefined;
            }
        }
    }
    return undefined;
};

创建更好的散列函数

我们实现的loselose散列函数并不是一个良好的散列函数，因为它会产生太多的冲突。下面实现一个djb2：

let djb2HashCode = function(key) {
    let hash = 5381;
    for(let i = 0; i<key.length; i++) {
        hash = hash * 33 + key.charCodeAt(i);
    }
    return hash % 1013;
}

他包括初始化一个hash变量并赋值为一个质数（大多数实现都使用5381），然后迭代参数key，将hash与33相乘（用来当做一个魔力数），并和当前迭代到的字符的ASCII码值相加。最后使用相加的和与另一个随机质数相除的余数。