Java 基础 - Java 基础 - 集合 - HashMap - 《ZeroCode》

">
put 方法
- 步骤
- 源码
红黑树转换
扩容
remove方法
- 源码
get 方法
- 步骤
- 源码
线程安全
- 问题
- 解决方法
  - Way1. 将Map转为包装类
  - Way2. ConcurrentHashMap
总结
参考

put 方法

步骤

对key的hashCode做hash操作，然后再计算在桶中的index（1.5 HashMap的哈希函数）；
如果没碰撞直接放到桶里；
如果碰撞了，以链表的形式存在桶后；
如果节点已经存在，就替换old value(保证key的唯一性)

如果桶满了(超过阈值，阈值=loadfactor*current capacity，load factor默认0.75)，就要resize。

源码

public V put(K key, V value) {
     return putVal(hash(key), key, value, false, true);
 }
 final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) {
     Node<K,V>[] tab; Node<K,V> p; int n, i;
     // 如果hashmap为空,调用resize方法，对hashmap进行扩容
     if ((tab = table) == null || (n = tab.length) == 0)
         n = (tab = resize()).length;
     // 根据index找到目标桶后，若当前桶上没有结点，那么直接新增一个结点，赋值给该bucket
     if ((p = tab[i = (n - 1) & hash]) == null)
         tab[i] = newNode(hash, key, value, null);
     else {
         Node<K,V> e; K k;
         // 若当前桶上有链表，且头结点就匹配，那么直接做替换即可；
         // key的hash值一样，key一样，key不为null
         if (p.hash == hash &&
             ((k = p.key) == key || (key != null && key.equals(k))))
             e = p;
         // 若当前桶上的是树结构，则转为红黑树的插入操作
         else if (p instanceof TreeNode)
             e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
         else {
             // 若上面的判断都不成立，则对链表做遍历操作
             for (int binCount = 0; ; ++binCount) {
                 // 尾插法，jdk 1.7 是头插法
                 // 若没有结点匹配，则在链表末尾追加
                 if ((e = p.next) == null) {
                     p.next = newNode(hash, key, value, null);
                     // 当链表的长度大于8 时，会将链表转化树
                     if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                         // 转红黑树
                         treeifyBin(tab, hash);
                     break;
                 }
                 if (e.hash == hash &&
                     ((k = e.key) == key || (key != null && key.equals(k))))
                     break;
                 p = e;
             }
         }
         //  若链表中有结点匹配，则做value替换
         if (e != null) { // existing mapping for key
             V oldValue = e.value;
             if (!onlyIfAbsent || oldValue == null)
                 e.value = value;
             afterNodeAccess(e);
             return oldValue;
         }
     }
     ++modCount;
     // 判断是否需要扩容
     if (++size > threshold)
         resize();
     afterNodeInsertion(evict);
     return null;
 }

红黑树转换

final void treeifyBin(Node<K,V>[] tab, int hash) {
    int n, index; Node<K,V> e;
    // 若表为空，或表长度小于MIN_TREEIFY_CAPACITY，也不做转换，直接做扩容处理。
    // MIN_TREEIFY_CAPACITY这个其实是转化成红黑树的另外一个条件，就是数组长度要大于64
    // 如果小于64 就可以通过扩容的方法，来减小冲突，没有必要转换成红黑树，因为红黑树的转换也是需要很大是 时间和空间的代价
    if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
        resize();
    // 获得需要树形化的 链表的第一个节点 也就是数组对应的数组节点table[i]
    else if ((e = tab[index = (n - 1) & hash]) != null) {
        TreeNode<K,V> hd = null, tl = null;
        do {
            //将普通的node节点 构造成TreeNode
            TreeNode<K,V> p = replacementTreeNode(e, null);
            if (tl == null)
                hd = p;
            else {
                p.prev = tl;
                tl.next = p;
            }
            tl = p;
        } while ((e = e.next) != null);
        // 替换成 构造成双链表的节点
        if ((tab[index] = hd) != null)
            // 进行红黑树转换
            hd.treeify(tab);
    }
}

final void treeify(Node<K,V>[] tab) {
            TreeNode<K,V> root = null;
            // 循环遍历链表
            for (TreeNode<K,V> x = this, next; x != null; x = next) {
                next = (TreeNode<K,V>)x.next;
                x.left = x.right = null;
                // 第一次循环        
                // 将x 赋值给root节点 初始化root节点的颜色 黑色（红黑树的根节点 一定是黑色）
                if (root == null) {
                    x.parent = null;
                    x.red = false;
                    root = x;
                }
                // 此处开始构造红黑树
                else {
                    //获得当前节点的 key  和 hash
                    K k = x.key;
                    int h = x.hash;
                    Class<?> kc = null;
                    // 从根节点开始每次遍历 寻找插入的位置
                    for (TreeNode<K,V> p = root;;) {
                        int dir, ph;
                        K pk = p.key;
          // 获得根节点的 key 
          // 判断key 与root的key的大小 确定dir的值，也就确定了插入的方向 ，是root节点的左边还是右边
                        if ((ph = p.hash) > h)
                            dir = -1;
                        else if (ph < h)
                            dir = 1;
    // 如果key的hash相等  或者实现comparable接口  又或者是实现了该接口 但是两个比较结果也相同
    // 所以为了打破这种平衡必须再次调用tieBreakOrder方法 比较一次 返回值 只有-1 或1 
                        else if ((kc == null &&
                                  (kc = comparableClassFor(k)) == null) ||
                                 (dir = compareComparables(kc, k, pk)) == 0)
                            dir = tieBreakOrder(k, pk);
                        TreeNode<K,V> xp = p;
                        if ((p = (dir <= 0) ? p.left : p.right) == null) {
                            // 当前链表节点 作为 当前树节点的父节点
                            x.parent = xp;
                            if (dir <= 0)
                                xp.left = x;
                            else
                                xp.right = x;
                            // 每次插入完一个节点之后，就要进行红黑树的调整
                            root = balanceInsertion(root, x);
                            break;
                        }
                    }
                }
            }
            // 根节点目前到底是链表的哪一个节点是不确定的 
            // 要将红黑树的根节点 移动至链表节点的第一个位置 也就是 table[i]的位置。
            moveRootToFront(tab, root);
        }

static <K,V> void moveRootToFront(Node<K,V>[] tab, TreeNode<K,V> root) {
            int n;
            if (root != null && tab != null && (n = tab.length) > 0) {
                // 获得当红黑树所处在 table数组的哪一个位置   (n - 1) & root.hash效果与取模相同
                int index = (n - 1) & root.hash;
                // 获得改位置的节点对象 也就是链表的第一个节点
                TreeNode<K,V> first = (TreeNode<K,V>)tab[index];
                // 如果这个位置不是 root节点 那么就要进行调整了
                if (root != first) {
                    // 定义一个临时Node 变量
                    Node<K,V> rn;
                    // 直接将链表的第一个位置 指向了root节点 
                    tab[index] = root;
                    // 将前驱和后继相连接 root节点从原链表中脱离出来了
                    TreeNode<K,V> rp = root.prev;
                    if ((rn = root.next) != null)
                        ((TreeNode<K,V>)rn).prev = rp;
                    if (rp != null)
                        rp.next = rn;
                    // 将root与原来双链表的第一个节点相连
                    // 这样root又回到双链表当中 并且在双链表的第一个位置上
                    if (first != null)
                        first.prev = root;
                    root.next = first;
                    // root节点的前驱节点设置为null  因为他双链表的第一个节点 没有前驱
                    root.prev = null;
                }
                assert checkInvariants(root);
            }
        }

扩容

在扩充HashMap的时候，不需要重新计算hash，只需要看看原来的hash值新增的那个bit是1还是0就好了，是0的话索引没变，是1的话索引变成“原索引+oldCap”。可以看看下图为16扩充为32的resize示意图：

这个设计确实非常的巧妙，既省去了重新计算hash值的时间，而且同时，由于新增的1bit是0还是1可以认为是随机的，因此resize的过程，均匀的把之前的冲突的节点分散到新的bucket了。

触发扩容的场景

哈希table为null或长度为0；
Map中存储的k-v对数量超过了阈值threshold；
链表中的长度超过了TREEIFY_THRESHOLD，但表长度却小于MIN_TREEIFY_CAPACITY。

扩容步骤

扩容：哈希表长度*2
移动：对每个节点重新计算哈希值，重新计算每个元素在数组中的位置，将原来的元素移动到新的哈希表中。

源码

// 初始化或加倍表大小。如果为 null，则根据字段阈值中保留的初始容量目标进行分配。否则，由于我们使用的是两个扩展的电源，因此每个 bin 中的元素必须保持在同一索引中，或者以新表中两个偏移的功率移动。
final Node<K,V>[] resize() {
        Node<K,V>[] oldTab = table;
        int oldCap = (oldTab == null) ? 0 : oldTab.length;
        int oldThr = threshold;
        int newCap, newThr = 0;
        if (oldCap > 0) {
            if (oldCap >= MAXIMUM_CAPACITY) {
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }
            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                     oldCap >= DEFAULT_INITIAL_CAPACITY)
                // oldThr*2
                newThr = oldThr << 1; 
        }
        else if (oldThr > 0) // initial capacity was placed in threshold
            newCap = oldThr;
        else {               // zero initial threshold signifies using defaults
            newCap = DEFAULT_INITIAL_CAPACITY;
            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
        }
        if (newThr == 0) {
            float ft = (float)newCap * loadFactor;
            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                      (int)ft : Integer.MAX_VALUE);
        }
        threshold = newThr;
        @SuppressWarnings({"rawtypes","unchecked"})
        Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
        table = newTab;
        //上面是做hash表的长度拓展
        //下面是如何迁移数据
        if (oldTab != null) {
            // 循环遍历哈希表的每个不为null的桶
            // 注意，这里是"++j"，略过了oldTab[0]的处理
            for (int j = 0; j < oldCap; ++j) {
                Node<K,V> e;
                if ((e = oldTab[j]) != null) {
                    oldTab[j] = null;
                    // 若只有一个结点，则原地存储
                    if (e.next == null)
                        newTab[e.hash & (newCap - 1)] = e;
                    else if (e instanceof TreeNode)
                        // 如果是红黑树，则执行split拆分
                        // 拆分过程中，如果新的红黑树长度小于等于 6 , 则红黑树就会退化成链表
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                    else { // preserve order
                        // "lo"前缀的代表要在原桶上存储，"hi"前缀的代表要在新桶上存储
                        // loHead代表是链表的头结点，loTail代表链表的尾结点
                        Node<K,V> loHead = null, loTail = null;
                        Node<K,V> hiHead = null, hiTail = null;
                        Node<K,V> next;
                        do {
                            next = e.next;
                            if ((e.hash & oldCap) == 0) {
                                // 有序插入，即依次将原链表的结点追加到当前链表的末尾
                                if (loTail == null)
                                    loHead = e;
                                else
                                    loTail.next = e;
                                loTail = e;
                            }
                            else {
                                if (hiTail == null)
                                    hiHead = e;
                                else
                                    hiTail.next = e;
                                hiTail = e;
                            }
                        } while ((e = next) != null);
                        if (loTail != null) {
                            loTail.next = null;
                            newTab[j] = loHead;
                        }
                        if (hiTail != null) {
                            hiTail.next = null;
                            // 需要搬迁的结点，新下标为从当前下标往前挪oldCap个距离。
                            newTab[j + oldCap] = hiHead;
                        }
                    }
                }
            }
        }
        return newTab;
    }

remove方法

源码

/**
* 从HashMap中删除掉指定key对应的键值对，并返回被删除的键值对的值
* 如果返回空，说明key可能不存在，也可能key对应的值就是null
* 如果想确定到底key是否存在可以使用containsKey方法
*/ 
   public V remove(Object key) {
        Node<K,V> e;
        return (e = removeNode(hash(key), key, null, false, true)) == null ?
            null : e.value;
    }
    /**
     * 方法为final，不可被覆写，子类可以通过实现afterNodeRemoval方法来增加自己的处理逻辑（解析中有描述）
     *
     * @param hash key的hash值，该值是通过hash(key)获取到的
     * @param key 要删除的键值对的key
     * @param value 要删除的键值对的value，该值是否作为删除的条件取决于matchValue是否为true
     * @param matchValue 如果为true，则当key对应的键值对的值equals(value)为true时才删除；否则不关心value的值
     * @param movable 删除后是否移动节点，如果为false，则不移动
     * @return 返回被删除的节点对象，如果没有删除任何节点则返回nul
     */
    final Node<K,V> removeNode(int hash, Object key, Object value,
                               boolean matchValue, boolean movable) {
        Node<K,V>[] tab; Node<K,V> p; int n, index;
    /*
     * 如果 节点数组tab不为空、数组长度n大于0、根据hash定位到的节点对象p（该节点为 树的根节点 或 链表的首节点）不为空
     * 需要从该节点p向下遍历，找到那个和key匹配的节点对象
     */
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (p = tab[index = (n - 1) & hash]) != null) {
            Node<K,V> node = null, e; K k; V v;
            // 如果当前节点的键和key相等，那么当前节点就是要删除的节点，赋值给node
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                node = p;
            /*
         * 到这一步说明首节点没有匹配上，那么检查下是否有next节点
         * 如果没有next节点，就说明该节点所在位置上没有发生hash碰撞, 就一个节点并且还没匹配上，也就没得删了，最终也就返回null了
         * 如果存在next节点，就说明该数组位置上发生了hash碰撞，此时可能存在一个链表，也可能是一颗红黑树
         */
            else if ((e = p.next) != null) {
                // 如果是红黑树
                if (p instanceof TreeNode)
                    node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
                // 如果是链表，循环遍历查找
                else {
                    do {
                        if (e.hash == hash &&
                            ((k = e.key) == key ||
                             (key != null && key.equals(k)))) {
                            node = e;
                            break;
                        }
                        p = e;
                    } while ((e = e.next) != null);
                }
            }
            /*
         * 如果node不为空，说明根据key匹配到了要删除的节点
         * 如果不需要对比value值  或者  需要对比value值但是value值也相等
         * 那么就可以删除该node节点了
         */
            if (node != null && (!matchValue || (v = node.value) == value ||
                                 (value != null && value.equals(v)))) {
                // 如果该节点是个TreeNode对象，说明此节点存在于红黑树结构中，调用removeTreeNode方法（该方法单独解析）移除该节点
                if (node instanceof TreeNode)
                    ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
                // 如果该节点不是TreeNode对象，node == p 的意思是该node节点就是首节点
                else if (node == p)
                    // 由于删除的是首节点，那么直接将节点数组对应位置指向到第二个节点即可
                    tab[index] = node.next;
                else
                    // 如果node节点不是首节点，此时p是node的父节点，由于要删除node，所有只需要把p的下一个节点指向到node的下一个节点即可把node从链表中删除了
                    p.next = node.next;
                // HashMap的修改次数递增
                ++modCount;
                // HashMap的元素个数递减
                --size;
                // 调用afterNodeRemoval方法，该方法HashMap没有任何实现逻辑，目的是为了让子类根据需要自行覆写
                afterNodeRemoval(node);
                return node;
            }
        }
        return null;
    }

get 方法

步骤

先根据key计算hash值，进一步计算得到哈希table的目标index，若此bucket上为红黑树，则再红黑树上查找，若不是红黑树，遍历链表。

源码

    public V get(Object key) {
        Node<K,V> e;
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    }
    final Node<K,V> getNode(int hash, Object key) {
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        // 根据哈希表的下标【(n - 1) & hash】来找到对应的桶
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (first = tab[(n - 1) & hash]) != null) {
            // always check first node
            // 判断桶的hash以及key都一样，则返回该桶
            if (first.hash == hash && 
                ((k = first.key) == key || (key != null && key.equals(k))))
                return first;
            // 如果发生hash碰撞则通过链表查找
            if ((e = first.next) != null) {
                if (first instanceof TreeNode)
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key);
                do {
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;
                } while ((e = e.next) != null);
            }
        }
        return null;
    }

线程安全

HashMap是线程不安全的，因为HashMap不是线程同步的操作。

问题

在jdk1.7中，在多线程环境下，扩容时会造成环形链或数据丢失。
在jdk1.8中，在多线程环境下，会发生数据覆盖的情况。

解决方法

Way1. 将Map转为包装类

Map<String, Integer> oMap = new HashMap<>();
// ...
// 转为线程安全的map
Map<String, Integer> sMap = Collections.synchronizedMap(oMap);

Way2. ConcurrentHashMap

ConcurrentHashMap 是Java并发包中提供的一个线程安全且高效的 HashMap 实现。

Map<String, Integer> cMap = new ConcurrentHashMap<>();
cMap.put("test001", 1);
cMap.put("test002", 2);
System.out.println(cMap.get("test001"));

总结

基于JDK 1.8 分析总结

结构：数组（hash表）+ 链表+ 红黑树（链表长度大于等于8，且hashmap长度大于等于64）
put采用的是尾插法，避免了之前的闭环风险
扩容是当前长度2倍
重写equls时，必须重写hashCode
不是线程安全的，并发时可能会导致数据覆盖（数据丢失）
参考

Java 基础 - 集合 - HashMap