介绍

散列函数(英语:Hash function)又称散列算法、哈希函数,是一种从任何一种数据中创建小的数字“指纹”的方法。散列函数把消息或数据压缩成摘要,使得数据量变小,将数据的格式固定下来。
在Java中,每个Object都有一个hashCode方法,有需要可以进行重写。

Object类的hashCode

  1. public native int hashCode();

可以看到此方法为native方法,底层是调用了C++的函数,因为是在Object类中,翻开JDK源码的Object.c的定义

  1. #include <stdio.h>
  2. #include <signal.h>
  3. #include <limits.h>
  4. #include "jni.h"
  5. #include "jni_util.h"
  6. #include "jvm.h"
  7. #include "java_lang_Object.h"
  8. static JNINativeMethod methods[] = {
  9. {"hashCode", "()I", (void *)&JVM_IHashCode},
  10. {"wait", "(J)V", (void *)&JVM_MonitorWait},
  11. {"notify", "()V", (void *)&JVM_MonitorNotify},
  12. {"notifyAll", "()V", (void *)&JVM_MonitorNotifyAll},
  13. {"clone", "()Ljava/lang/Object;", (void *)&JVM_Clone},
  14. };
  15. JNIEXPORT void JNICALL
  16. Java_java_lang_Object_registerNatives(JNIEnv *env, jclass cls)
  17. {
  18. (*env)->RegisterNatives(env, cls,
  19. methods, sizeof(methods)/sizeof(methods[0]));
  20. }
  21. JNIEXPORT jclass JNICALL
  22. Java_java_lang_Object_getClass(JNIEnv *env, jobject this)
  23. {
  24. if (this == NULL) {
  25. JNU_ThrowNullPointerException(env, NULL);
  26. return 0;
  27. } else {
  28. return (*env)->GetObjectClass(env, this);
  29. }
  30. }

可以看到hashCode方法实际上调用的是JVM_IHashCode这个方法。查找这个方法发现在jvm.cpp中定义

  1. // java.lang.Object ///////////////////////////////////////////////
  2. JVM_ENTRY(jint, JVM_IHashCode(JNIEnv* env, jobject handle))
  3. JVMWrapper("JVM_IHashCode");
  4. // as implemented in the classic virtual machine; return 0 if object is NULL
  5. return handle == NULL ? 0 : ObjectSynchronizer::FastHashCode (THREAD, JNIHandles::resolve_non_null(handle)) ;
  6. JVM_END

从这里看出JVM_IHashCode实际又是调用的ObjectSynchronizer::FastHashCode这个方法,继续查找到这个方法的定义在synchronizer.cpp中:

FastHashCode函数

  1. intptr_t ObjectSynchronizer::FastHashCode (Thread * Self, oop obj) {
  2. // 如果使用了偏向锁
  3. if (UseBiasedLocking) {
  4. // NOTE: many places throughout the JVM do not expect a safepoint
  5. // to be taken here, in particular most operations on perm gen
  6. // objects. However, we only ever bias Java instances and all of
  7. // the call sites of identity_hash that might revoke biases have
  8. // been checked to make sure they can handle a safepoint. The
  9. // added check of the bias pattern is to avoid useless calls to
  10. // thread-local storage.
  11. // 对象处于偏向状态
  12. if (obj->mark()->has_bias_pattern()) {
  13. // Box and unbox the raw reference just in case we cause a STW safepoint.
  14. Handle hobj (Self, obj) ;
  15. // Relaxing assertion for bug 6320749.
  16. assert (Universe::verify_in_progress() ||
  17. !SafepointSynchronize::is_at_safepoint(),
  18. "biases should not be seen by VM thread here");
  19. // 撤销偏向锁
  20. BiasedLocking::revoke_and_rebias(hobj, false, JavaThread::current());
  21. obj = hobj() ;
  22. // 保证对象没有偏向锁
  23. assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
  24. }
  25. }
  26. // hashCode() is a heap mutator ...
  27. // Relaxing assertion for bug 6320749.
  28. // 达到安全点
  29. assert (Universe::verify_in_progress() ||
  30. !SafepointSynchronize::is_at_safepoint(), "invariant") ;
  31. // 是java线程
  32. assert (Universe::verify_in_progress() ||
  33. Self->is_Java_thread() , "invariant") ;
  34. // 线程没有被阻塞
  35. assert (Universe::verify_in_progress() ||
  36. ((JavaThread *)Self)->thread_state() != _thread_blocked, "invariant") ;
  37. ObjectMonitor* monitor = NULL;
  38. markOop temp, test;
  39. intptr_t hash;
  40. // 读出一个稳定的mark,如果对象处于锁膨胀,那么等待锁膨胀完毕再读
  41. markOop mark = ReadStableMark (obj);
  42. // 判断没有处于偏向锁状态
  43. // object should remain ineligible for biased locking
  44. assert (!mark->has_bias_pattern(), "invariant") ;
  45. // 处于无锁状态
  46. if (mark->is_neutral()) {
  47. hash = mark->hash(); // this is a normal header
  48. // 有hash值直接返回
  49. if (hash) { // if it has hash, just return it
  50. return hash;
  51. }
  52. // 没有hash值,调用get_next_hash函数计算hash值
  53. hash = get_next_hash(Self, obj); // allocate a new hash code
  54. temp = mark->copy_set_hash(hash); // merge the hash code into header
  55. // use (machine word version) atomic operation to install the hash
  56. test = (markOop) Atomic::cmpxchg_ptr(temp, obj->mark_addr(), mark);
  57. if (test == mark) {
  58. return hash;
  59. }
  60. // If atomic operation failed, we must inflate the header
  61. // into heavy weight monitor. We could add more code here
  62. // for fast path, but it does not worth the complexity.
  63. } else if (mark->has_monitor()) {
  64. monitor = mark->monitor();
  65. temp = monitor->header();
  66. assert (temp->is_neutral(), "invariant") ;
  67. hash = temp->hash();
  68. if (hash) {
  69. return hash;
  70. }
  71. // Skip to the following code to reduce code size
  72. } else if (Self->is_lock_owned((address)mark->locker())) {
  73. temp = mark->displaced_mark_helper(); // this is a lightweight monitor owned
  74. assert (temp->is_neutral(), "invariant") ;
  75. hash = temp->hash(); // by current thread, check if the displaced
  76. if (hash) { // header contains hash code
  77. return hash;
  78. }
  79. // WARNING:
  80. // The displaced header is strictly immutable.
  81. // It can NOT be changed in ANY cases. So we have
  82. // to inflate the header into heavyweight monitor
  83. // even the current thread owns the lock. The reason
  84. // is the BasicLock (stack slot) will be asynchronously
  85. // read by other threads during the inflate() function.
  86. // Any change to stack may not propagate to other threads
  87. // correctly.
  88. }
  89. // Inflate the monitor to set hash code
  90. monitor = ObjectSynchronizer::inflate(Self, obj);
  91. // Load displaced header and check it has hash code
  92. mark = monitor->header();
  93. assert (mark->is_neutral(), "invariant") ;
  94. hash = mark->hash();
  95. if (hash == 0) {
  96. hash = get_next_hash(Self, obj);
  97. temp = mark->copy_set_hash(hash); // merge hash code into header
  98. assert (temp->is_neutral(), "invariant") ;
  99. test = (markOop) Atomic::cmpxchg_ptr(temp, monitor, mark);
  100. if (test != mark) {
  101. // The only update to the header in the monitor (outside GC)
  102. // is install the hash code. If someone add new usage of
  103. // displaced header, please update this code
  104. hash = test->hash();
  105. assert (test->is_neutral(), "invariant") ;
  106. assert (hash != 0, "Trivial unexpected object/monitor header usage.");
  107. }
  108. }
  109. // We finally get the hash
  110. return hash;
  111. }

以上分析出在对象头没有hash值的情况下,会调用get_next_hash函数计算出hash值,get_next_hash的定义如下:

get_next_hash函数

  1. static inline intptr_t get_next_hash(Thread * Self, oop obj) {
  2. intptr_t value = 0 ;
  3. if (hashCode == 0) {
  4. // This form uses an unguarded global Park-Miller RNG,
  5. // so it's possible for two threads to race and generate the same RNG.
  6. // On MP system we'll have lots of RW access to a global, so the
  7. // mechanism induces lots of coherency traffic.
  8. // 系统产生随机数
  9. value = os::random() ;
  10. } else
  11. if (hashCode == 1) {
  12. // This variation has the property of being stable (idempotent)
  13. // between STW operations. This can be useful in some of the 1-0
  14. // synchronization schemes.
  15. // 内存地址做移位再和一个随机数做异或
  16. intptr_t addrBits = cast_from_oop<intptr_t>(obj) >> 3 ;
  17. value = addrBits ^ (addrBits >> 5) ^ GVars.stwRandom ;
  18. } else
  19. if (hashCode == 2) {
  20. // 固定返回1
  21. value = 1 ; // for sensitivity testing
  22. } else
  23. if (hashCode == 3) {
  24. // 序列自增
  25. value = ++GVars.hcSequence ;
  26. } else
  27. if (hashCode == 4) {
  28. // 返回内存地址
  29. value = cast_from_oop<intptr_t>(obj) ;
  30. } else {
  31. // Marsaglia's xor-shift 随机数算法
  32. // Marsaglia's xor-shift scheme with thread-specific state
  33. // This is probably the best overall implementation -- we'll
  34. // likely make this the default in future releases.
  35. unsigned t = Self->_hashStateX ;
  36. t ^= (t << 11) ;
  37. Self->_hashStateX = Self->_hashStateY ;
  38. Self->_hashStateY = Self->_hashStateZ ;
  39. Self->_hashStateZ = Self->_hashStateW ;
  40. unsigned v = Self->_hashStateW ;
  41. v = (v ^ (v >> 19)) ^ (t ^ (t >> 8)) ;
  42. Self->_hashStateW = v ;
  43. value = v ;
  44. }
  45. value &= markOopDesc::hash_mask;
  46. if (value == 0) value = 0xBAD ;
  47. assert (value != markOopDesc::no_hash, "invariant") ;
  48. TEVENT (hashCode: GENERATE) ;
  49. return value;
  50. }

可以看到有6种hashCode的生成方法,根据hashCode变量指定不同的生成方案,可以通过在JVM启动参数中添加-XX:hashCode=?,改变默认的hashCode计算方式。
对于OpenJDK8,默认是用最后一种,使用的是Marsaglia’s xor-shift随机数算法。

  1. product(intx, hashCode, 5,"(Unstable) select hashCode generation algorithm")

此方法使用了三个固定值和一个随机数,在thread.cpp中定义

  1. // thread-specific hashCode stream generator state - Marsaglia shift-xor form
  2. _hashStateX = os::random() ;
  3. _hashStateY = 842502087 ;
  4. _hashStateZ = 0x8767 ; // (int)(3579807591LL & 0xffff) ;
  5. _hashStateW = 273326509 ;

所以,对于JDK8,对象在没有重写hashCode的情况下,hashCode的生成和内存地址无关,默认是根据Marsaglia’s xor-shift算法随机生成的。