微服务在注册中心被剔除分为两种情况,

  • 一种正常的下架,即客户端发送一个Http请求告诉注册中心,这个服务可以下架
  • 另一种是,注册中心长时间没有收到某个客户端的心跳,注册中心会定时剔除

1.正常情况下下架

  1. @DELETE
  2. public Response cancelLease(
  3. @HeaderParam(PeerEurekaNode.HEADER_REPLICATION) String isReplication) {
  4. try {
  5. // 服务下架请求
  6. boolean isSuccess = registry.cancel(app.getName(), id,
  7. "true".equals(isReplication));
  8. if (isSuccess) {
  9. logger.debug("Found (Cancel): {} - {}", app.getName(), id);
  10. return Response.ok().build();
  11. } else {
  12. logger.info("Not Found (Cancel): {} - {}", app.getName(), id);
  13. return Response.status(Status.NOT_FOUND).build();
  14. }
  15. } catch (Throwable e) {
  16. logger.error("Error (cancel): {} - {}", app.getName(), id, e);
  17. return Response.serverError().build();
  18. }
  19. }
  1. @Override
  2. public boolean cancel(final String appName, final String id,
  3. final boolean isReplication) {
  4. if (super.cancel(appName, id, isReplication)) {
  5. // 下架成功后同步
  6. replicateToPeers(Action.Cancel, appName, id, null, null, isReplication);
  7. synchronized (lock) {
  8. if (this.expectedNumberOfRenewsPerMin > 0) {
  9. // Since the client wants to cancel it, reduce the threshold (1 for 30 seconds, 2 for a minute)
  10. this.expectedNumberOfRenewsPerMin = this.expectedNumberOfRenewsPerMin - 2;
  11. this.numberOfRenewsPerMinThreshold =
  12. (int) (this.expectedNumberOfRenewsPerMin * serverConfig.getRenewalPercentThreshold());
  13. }
  14. }
  15. return true;
  16. }
  17. return false;
  18. }
  1. public boolean cancel(String appName, String id, boolean isReplication) {
  2. return internalCancel(appName, id, isReplication);
  3. }
  1. protected boolean internalCancel(String appName, String id, boolean isReplication) {
  2. try {
  3. read.lock();
  4. CANCEL.increment(isReplication);
  5. // 通过微服务名拿到服务组
  6. Map<String, Lease<InstanceInfo>> gMap = registry.get(appName);
  7. Lease<InstanceInfo> leaseToCancel = null;
  8. if (gMap != null) {
  9. // 根据实例id将服务费实例从服务组中剔除
  10. leaseToCancel = gMap.remove(id);
  11. }
  12. synchronized (recentCanceledQueue) {
  13. recentCanceledQueue.add(new Pair<Long, String>(System.currentTimeMillis(), appName + "(" + id + ")"));
  14. }
  15. InstanceStatus instanceStatus = overriddenInstanceStatusMap.remove(id);
  16. if (instanceStatus != null) {
  17. logger.debug("Removed instance id {} from the overridden map which has value {}", id, instanceStatus.name());
  18. }
  19. if (leaseToCancel == null) {
  20. CANCEL_NOT_FOUND.increment(isReplication);
  21. logger.warn("DS: Registry: cancel failed because Lease is not registered for: {}/{}", appName, id);
  22. return false;
  23. } else {
  24. // 记录服务被剔除的时间
  25. leaseToCancel.cancel();
  26. InstanceInfo instanceInfo = leaseToCancel.getHolder();
  27. String vip = null;
  28. String svip = null;
  29. if (instanceInfo != null) {
  30. instanceInfo.setActionType(ActionType.DELETED);
  31. recentlyChangedQueue.add(new RecentlyChangedItem(leaseToCancel));
  32. instanceInfo.setLastUpdatedTimestamp();
  33. vip = instanceInfo.getVIPAddress();
  34. svip = instanceInfo.getSecureVipAddress();
  35. }
  36. invalidateCache(appName, vip, svip);
  37. logger.info("Cancelled instance {}/{} (replication={})", appName, id, isReplication);
  38. return true;
  39. }
  40. } finally {
  41. read.unlock();
  42. }
  43. }

2.客户端发生故障下架(服务剔除)

Eureka Server会启动一个定时器(默认15分钟),定时判断注册在上面的客户端是否过期。
定时器启动后会调用AbstractInstanceRegistry类中的evict方法

  1. public void evict() {
  2. evict(0l);
  3. }
  1. public void evict(long additionalLeaseMs) {
  2. logger.debug("Running the evict task");
  3. if (!isLeaseExpirationEnabled()) {
  4. logger.debug("DS: lease expiration is currently disabled.");
  5. return;
  6. }
  7. // We collect first all expired items, to evict them in random order. For large eviction sets,
  8. // if we do not that, we might wipe out whole apps before self preservation kicks in. By randomizing it,
  9. // the impact should be evenly distributed across all applications.
  10. // 定义一个list集合接收过期的微服务
  11. List<Lease<InstanceInfo>> expiredLeases = new ArrayList<>();
  12. // 遍历注册中心中所有的微服务
  13. for (Entry<String, Map<String, Lease<InstanceInfo>>> groupEntry : registry.entrySet()) {
  14. Map<String, Lease<InstanceInfo>> leaseMap = groupEntry.getValue();
  15. if (leaseMap != null) {
  16. for (Entry<String, Lease<InstanceInfo>> leaseEntry : leaseMap.entrySet()) {
  17. Lease<InstanceInfo> lease = leaseEntry.getValue();
  18. if (lease.isExpired(additionalLeaseMs) && lease.getHolder() != null) {
  19. // 拿到服务实例对象,判断服务实例对象是否过期(90s没有发送心跳的微服务实例),
  20. // 过期则加入到
  21. expiredLeases.add(lease);
  22. }
  23. }
  24. }
  25. }
  26. // To compensate for GC pauses or drifting local time, we need to use current registry size as a base for
  27. // triggering self-preservation. Without that we would wipe out full registry.
  28. // 拿到所有注册在Eureka Server上的微服务实例对象的数量
  29. int registrySize = (int) getLocalRegistrySize();
  30. // 微服务数量的阈值 = registrySize * 0.85
  31. int registrySizeThreshold = (int) (registrySize * serverConfig.getRenewalPercentThreshold());
  32. // 剔除服务极限值 = registrySize * 0.15,即15%的微服务数量
  33. int evictionLimit = registrySize - registrySizeThreshold;
  34. // 要被剔除服务的数量,每次不能超过微服务数量的15%
  35. int toEvict = Math.min(expiredLeases.size(), evictionLimit);
  36. if (toEvict > 0) {
  37. logger.info("Evicting {} items (expired={}, evictionLimit={})", toEvict, expiredLeases.size(), evictionLimit);
  38. Random random = new Random(System.currentTimeMillis());
  39. for (int i = 0; i < toEvict; i++) {
  40. // Pick a random item (Knuth shuffle algorithm)
  41. int next = i + random.nextInt(expiredLeases.size() - i);
  42. Collections.swap(expiredLeases, i, next);
  43. // 随机获取过期集合中的过期的服务实例
  44. Lease<InstanceInfo> lease = expiredLeases.get(i);
  45. String appName = lease.getHolder().getAppName();
  46. String id = lease.getHolder().getId();
  47. EXPIRED.increment();
  48. logger.warn("DS: Registry: expired lease for {}/{}", appName, id);
  49. // 调用服务下架方法
  50. internalCancel(appName, id, false);
  51. }
  52. }
  53. }

步骤梳理:

  • 启动一个定时器(默认15分钟一次),调用AbstractInstanceRegistry类中的evict方法
  • 定义一个过期的实例集合用于接收过期的微服务实例
  • 遍历注册中心上的所有的微服务实例,判断实例是否过期(90s没有发送心跳的微服务实例),过期则加入到过期集合中
  • 由于Eureka Server的自我保护机制,每次剔除不超过15%的注册中心的总量的过期实例
  • 随机从过期集合中剔除不超过15%的注册中心的总量的过期实例