HPA
HPA(Horizontal Pod Autoscaler)是 Kubernetes 中实现自动水平扩缩容的核心组件。它的实现涉及多个 Kubernetes 组件和复杂的控制逻辑。
一、HPA 架构组成
1. 核心组件
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
│ HPA Controller │ ◄──│ Metrics API │ ◄──│ Metrics Server │
│ (kube-controller)│ │ (聚合层) │ │ (cAdvisor) │
└─────────────────┘ └──────────────────┘ └─────────────────┘
│ │ │
▼ ▼ ▼
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
│ Deployment/ │ │ Custom Metrics │ │ External │
│ StatefulSet │ │ Adapter │ │ Metrics │
└─────────────────┘ └──────────────────┘ └─────────────────┘二、HPA 工作流程
1. 完整的控制循环
// 简化的 HPA 控制逻辑
for {
// 1. 获取 HPA 对象
hpa := client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(name)
// 2. 获取缩放目标(Deployment/StatefulSet等)
scaleTarget := hpa.Spec.ScaleTargetRef
target := client.AppsV1().Deployments(namespace).Get(scaleTarget.Name)
// 3. 查询指标
metrics := []autoscalingv2.MetricStatus{}
for _, metricSpec := range hpa.Spec.Metrics {
metricValue := getMetricValue(metricSpec, target)
metrics = append(metrics, metricValue)
}
// 4. 计算期望副本数
desiredReplicas := calculateDesiredReplicas(hpa, metrics, currentReplicas)
// 5. 执行缩放
if desiredReplicas != currentReplicas {
scaleTarget.Spec.Replicas = &desiredReplicas
client.AppsV1().Deployments(namespace).UpdateScale(scaleTarget.Name, scaleTarget)
}
time.Sleep(15 * time.Second) // 默认扫描间隔
}2. 详细步骤分解
步骤 1:指标收集
# HPA 通过 Metrics API 获取指标
kubectl get --raw "/apis/metrics.k8s.io/v1beta1/namespaces/default/pods" | jq .
# 或者通过自定义指标 API
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq .步骤 2:指标计算
// 计算当前指标值与目标值的比率
func calculateMetricRatio(currentValue, targetValue int64) float64 {
return float64(currentValue) / float64(targetValue)
}
// 示例:CPU 使用率计算
currentCPUUsage := 800m # 当前使用 800 milli-cores
targetCPUUsage := 500m # 目标使用 500 milli-cores
ratio := 800.0 / 500.0 # = 1.6三、HPA 配置详解
1. HPA 资源定义
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: myapp-hpa
namespace: default
spec:
# 缩放目标
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: myapp
# 副本数范围
minReplicas: 2
maxReplicas: 10
# 指标定义
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 50
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 70
- type: Pods
pods:
metric:
name: packets-per-second
target:
type: AverageValue
averageValue: 1k
- type: Object
object:
metric:
name: requests-per-second
describedObject:
apiVersion: networking.k8s.io/v1
kind: Ingress
name: main-route
target:
type: Value
value: 10k
# 行为配置(Kubernetes 1.18+)
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 50
periodSeconds: 60
- type: Pods
value: 5
periodSeconds: 60
selectPolicy: Min
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 15
- type: Pods
value: 4
periodSeconds: 15
selectPolicy: Max四、指标类型和计算方式
1. 资源指标(CPU/Memory)
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization # 利用率模式
averageUtilization: 50
- type: Resource
resource:
name: memory
target:
type: AverageValue # 平均值模式
averageValue: 512Mi计算逻辑:
// CPU 利用率计算
func calculateCPUReplicas(currentUsage, targetUtilization int32, currentReplicas int32) int32 {
// 当前总使用量
totalUsage := currentUsage * currentReplicas
// 期望副本数 = ceil(当前总使用量 / (单个 Pod 请求量 * 目标利用率))
desiredReplicas := int32(math.Ceil(float64(totalUsage) / float64(targetUtilization)))
return desiredReplicas
}2. 自定义指标(Pods 类型)
metrics:
- type: Pods
pods:
metric:
name: http_requests_per_second
target:
type: AverageValue
averageValue: 100计算方式:
期望副本数 = ceil(当前总指标值 / 目标平均值)3. 对象指标(Object 类型)
metrics:
- type: Object
object:
metric:
name: latency
describedObject:
apiVersion: networking.k8s.io/v1
kind: Ingress
name: my-ingress
target:
type: Value
value: 100五、HPA 算法详解
1. 核心算法
// 计算期望副本数
func GetDesiredReplicas(
currentReplicas int32,
metricValues []metrics,
hpa *HorizontalPodAutoscaler,
) int32 {
ratios := make([]float64, 0)
// 1. 计算每个指标的比率
for _, metric := range metricValues {
ratio := calculateMetricRatio(metric.current, metric.target)
ratios = append(ratios, ratio)
}
// 2. 选择最大的比率(最需要扩容的指标)
maxRatio := getMaxRatio(ratios)
// 3. 计算期望副本数
desiredReplicas := math.Ceil(float64(currentReplicas) * maxRatio)
// 4. 应用边界限制
desiredReplicas = applyBounds(desiredReplicas, hpa.Spec.MinReplicas, hpa.Spec.MaxReplicas)
return int32(desiredReplicas)
}2. 平滑算法和冷却机制
// 考虑历史记录的缩放决策
func withStabilization(desiredReplicas int32, hpa *HorizontalPodAutoscaler) int32 {
now := time.Now()
if isScaleUp(desiredReplicas, hpa.Status.CurrentReplicas) {
// 扩容:通常立即执行
stabilizationWindow = hpa.Spec.Behavior.ScaleUp.StabilizationWindowSeconds
} else {
// 缩容:应用稳定窗口
stabilizationWindow = hpa.Spec.Behavior.ScaleDown.StabilizationWindowSeconds
}
// 过滤稳定窗口内的历史推荐值
validRecommendations := filterRecommendationsByTime(
hpa.Status.Conditions,
now.Add(-time.Duration(stabilizationWindow)*time.Second)
)
// 选择策略(Min/Max)
finalReplicas := applyPolicy(validRecommendations, hpa.Spec.Behavior)
return finalReplicas
}六、高级特性实现
1. 多指标支持
当配置多个指标时,HPA 会为每个指标计算期望副本数,然后选择最大值:
func calculateFromMultipleMetrics(metrics []Metric, currentReplicas int32) int32 {
desiredReplicas := make([]int32, 0)
for _, metric := range metrics {
replicas := calculateForSingleMetric(metric, currentReplicas)
desiredReplicas = append(desiredReplicas, replicas)
}
// 选择最大的期望副本数
return max(desiredReplicas...)
}2. 扩缩容行为控制
behavior:
scaleDown:
# 缩容稳定窗口:5分钟
stabilizationWindowSeconds: 300
policies:
- type: Percent # 每分钟最多缩容 50%
value: 50
periodSeconds: 60
- type: Pods # 或每分钟最多减少 5 个 Pod
value: 5
periodSeconds: 60
selectPolicy: Min # 选择限制更严格的策略
scaleUp:
stabilizationWindowSeconds: 0 # 扩容立即执行
policies:
- type: Percent # 每分钟最多扩容 100%
value: 100
periodSeconds: 60
- type: Pods # 或每分钟最多增加 4 个 Pod
value: 4
periodSeconds: 60
selectPolicy: Max # 选择限制更宽松的策略七、监控和调试
1. 查看 HPA 状态
# 查看 HPA 详情
kubectl describe hpa myapp-hpa
# 输出示例:
# Name: myapp-hpa
# Namespace: default
# Reference: Deployment/myapp
# Metrics: ( current / target )
# resource cpu on pods (as a percentage of request): 65% (130m) / 50%
# resource memory on pods: 120Mi / 100Mi
# Min replicas: 2
# Max replicas: 10
# Deployment pods: 3 current / 3 desired2. HPA 相关事件
# 查看 HPA 事件
kubectl get events --field-selector involvedObject.kind=HorizontalPodAutoscaler
# 查看缩放历史
kubectl describe deployment myapp | grep -A 10 "Events"3. 指标调试
# 检查 Metrics API 是否正常工作
kubectl get --raw "/apis/metrics.k8s.io/v1beta1/nodes" | jq .
# 检查自定义指标
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq .
# 直接查询 Pod 指标
kubectl top pods
kubectl top nodes八、常见问题排查
1. HPA 不扩容
# 检查指标是否可用
kubectl describe hpa myapp-hpa
# 查看 Events 部分是否有错误信息
# 检查 Metrics Server
kubectl get apiservices | grep metrics
kubectl logs -n kube-system -l k8s-app=metrics-server
# 检查资源请求配置
kubectl get deployment myapp -o yaml | grep resources -A 52. HPA 频繁震荡
# 调整行为配置
behavior:
scaleDown:
stabilizationWindowSeconds: 600 # 延长缩容稳定窗口
policies:
- type: Pods
value: 1 # 每次只缩容 1 个 Pod
periodSeconds: 300 # 每 5 分钟最多一次九、性能优化建议
1. 大规模集群优化
# kube-controller-manager 配置优化
apiVersion: v1
kind: Pod
metadata:
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --horizontal-pod-autoscaler-sync-period=30s # 调整同步周期
- --horizontal-pod-autoscaler-downscale-stabilization=5m0s
- --horizontal-pod-autoscaler-initial-readiness-delay=30s
- --horizontal-pod-autoscaler-cpu-initialization-period=5m0s总结
HPA 的实现涉及:
- 指标收集:通过 Metrics API 聚合资源和使用情况
- 算法计算:基于指标值和目标值计算期望副本数
- 决策平滑:应用稳定窗口和策略避免震荡
- 执行缩放:更新目标资源的副本数
- 状态跟踪:记录缩放历史和当前状态
HPA 的智能化体现在多指标决策、行为控制、冷却机制等方面,使得自动扩缩容既响应迅速又稳定可靠。