首页 Go 的 slice 扩容机制
文章
取消

Go 的 slice 扩容机制

如题,这是个老生常谈的问题了,但作为一个经典 Go 面试八股文题目,它很尴尬,因为它变化很频繁,不同 Go 版本的 slice 扩容机制可能不同。网络上的资料五花八门,很多已经过时了,但当面试官拿着过时的标准答案来比对你的回答时,就很无奈了。

笔者拿现在(截止文章发布日期)最新版本 Go1.19.3 进行讨论。

标准答案必须从源码中获得,slice 扩容的核心函数是 growslice

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
// src/runtime/slice.go

// growslice handles slice growth during append.
// It is passed the slice element type, the old slice, and the desired new minimum capacity,
// and it returns a new slice with at least that capacity, with the old data
// copied into it.
// The new slice's length is set to the old slice's length,
// NOT to the new requested capacity.
// This is for codegen convenience. The old slice's length is used immediately
// to calculate where to write new values during an append.
// TODO: When the old backend is gone, reconsider this decision.
// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
// et 为 slice 元素类型
// old 为原 slice
// cap 为扩容所需最小容量
func growslice(et *_type, old slice, cap int) slice {

    // 无关代码开始 ↓
	if raceenabled {
		callerpc := getcallerpc()
		racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, abi.FuncPCABIInternal(growslice))
	}
	if msanenabled {
		msanread(old.array, uintptr(old.len*int(et.size)))
	}
	if asanenabled {
		asanread(old.array, uintptr(old.len*int(et.size)))
	}

    // 无关代码结束 ↑

	if cap < old.cap {
        // 小于原 cap,非法的 cap
		panic(errorString("growslice: cap out of range"))
	}

	if et.size == 0 {
		// append should not create a slice with nil pointer but non-zero len.
		// We assume that append doesn't need to preserve old.array in this case.
		return slice{unsafe.Pointer(&zerobase), old.len, cap}
	}

	newcap := old.cap
	doublecap := newcap + newcap
	if cap > doublecap {
        // 1. cap大于2倍原容量,newcap使用cap
		newcap = cap
	} else {
		const threshold = 256
		if old.cap < threshold {
            // 2. 原容量小于阈值,newcap使用2倍原容量
			newcap = doublecap
		} else {
            // 原容量大于阈值
			// Check 0 < newcap to detect overflow
			// and prevent an infinite loop.
			for 0 < newcap && newcap < cap {
				// Transition from growing 2x for small slices
				// to growing 1.25x for large slices. This formula
				// gives a smooth-ish transition between the two.
                // 
                // 3. newcap每次增长 (newcap + 3*threshold) / 4,直到 newcap 不小于 cap
                // 这种方式是小容量2倍增长和大容量1.25倍增长机制的折中处理
				newcap += (newcap + 3*threshold) / 4
			}
			// Set newcap to the requested cap when
			// the newcap calculation overflowed.
			if newcap <= 0 {
                // 4. 前面的计算中 newcap 可能会溢出变成负数,这时 newcap 使用 cap
				newcap = cap
			}
		}
	}

	var overflow bool
    // lenmem,slice 原数组数据内存大小,单位为字节
    // newlenmem,slice 数组数据所需内存的最小大小,单位为字节
    // capmem, slice 扩容后数组空间内存大小
	var lenmem, newlenmem, capmem uintptr
	// Specialize for common values of et.size.
	// For 1 we don't need any division/multiplication.
	// For goarch.PtrSize, compiler will optimize division/multiplication into a shift by a constant.
	// For powers of 2, use a variable shift.
    // 4. 内存对齐操作,会再调整 newcap
	switch {
	case et.size == 1:
        // 元素大小为1字节
		lenmem = uintptr(old.len)
		newlenmem = uintptr(cap)
		capmem = roundupsize(uintptr(newcap))
		overflow = uintptr(newcap) > maxAlloc
		newcap = int(capmem)
	case et.size == goarch.PtrSize:
        // 元素大小为相应平台的指针大小(也是原生字长,32位为4字节,64位为8字节)
		lenmem = uintptr(old.len) * goarch.PtrSize
		newlenmem = uintptr(cap) * goarch.PtrSize
		capmem = roundupsize(uintptr(newcap) * goarch.PtrSize)
		overflow = uintptr(newcap) > maxAlloc/goarch.PtrSize
		newcap = int(capmem / goarch.PtrSize)
	case isPowerOfTwo(et.size):
        // 元素大小是2的幂
		var shift uintptr
		if goarch.PtrSize == 8 {
			// Mask shift for better code generation.
			shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
		} else {
			shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
		}
		lenmem = uintptr(old.len) << shift
		newlenmem = uintptr(cap) << shift
		capmem = roundupsize(uintptr(newcap) << shift)
		overflow = uintptr(newcap) > (maxAlloc >> shift)
		newcap = int(capmem >> shift)
	default:
        // 其它的元素大小
		lenmem = uintptr(old.len) * et.size
		newlenmem = uintptr(cap) * et.size
		capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
		capmem = roundupsize(capmem)
		newcap = int(capmem / et.size)
	}

	// The check of overflow in addition to capmem > maxAlloc is needed
	// to prevent an overflow which can be used to trigger a segfault
	// on 32bit architectures with this example program:
	//
	// type T [1<<27 + 1]int64
	//
	// var d T
	// var s []T
	//
	// func main() {
	//   s = append(s, d, d, d, d)
	//   print(len(s), "\n")
	// }
	if overflow || capmem > maxAlloc {
        // slice 增长地太大,超出内存分配限制
		panic(errorString("growslice: cap out of range"))
	}

	var p unsafe.Pointer
	if et.ptrdata == 0 {
		p = mallocgc(capmem, nil, false)
		// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
		// Only clear the part that will not be overwritten.
        // 清空 old.len、old.cap 的内存,后面还要重新赋值 len 和 cap
		memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
	} else {
		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
		p = mallocgc(capmem, et, true)
		if lenmem > 0 && writeBarrier.enabled {
			// Only shade the pointers in old.array since we know the destination slice p
			// only contains nil pointers because it has been cleared during alloc.
			bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem-et.size+et.ptrdata)
		}
	}
    // 将原数据迁移到新申请的内存空间上
	memmove(p, old.array, lenmem)
    // 返回新 slice
	return slice{p, old.len, newcap}
}

上面的代码中我添加了注释,关乎扩容的逻辑重点有4个:

// 1. cap大于2倍原容量,newcap使用cap // 2. 原容量小于阈值,newcap使用2倍原容量 // 3. newcap每次增长 (newcap + 3*threshold) / 4,直到 newcap 不小于 cap,这种方式是小容量2倍增长和大容量1.25倍增长机制的折中处理 // 4. 内存对齐操作,会再调整 newcap

上面第3步中,在旧 Go 版本中是按1.25倍增长的,但这种方式太粗暴,因此在最新版中采取了更柔和的方式扩容。

整理成流程图如下:

zc9K0S.png

我们来实际看下当 slice 逐步增加元素时,len,cap 的变化:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
func main() {
	f, err := os.Create("sliceIncr.txt")
	if err != nil {
		panic(err)
	}
	defer func() { _ = f.Close() }()

	s := make([]int, 0)

	lastCap := 0
	for i := 0; i < 10000; i++ {
		var incrRatio = 0.0
		if lastCap != 0 {
			incrRatio = float64(cap(s)) / float64(lastCap)
		}
		f.WriteString(fmt.Sprintf("len:%d, cap:%d, cap incr times:%f \n", len(s), cap(s), incrRatio))
		s = append(s, i)
		lastCap = cap(s)
	}
}

记录的文件内容,部分行数据已省略:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
len:1, cap:1, cap incr times:0.000000 
len:2, cap:2, cap incr times:2.000000 
len:3, cap:4, cap incr times:2.000000 
len:4, cap:4, cap incr times:1.000000 
len:5, cap:8, cap incr times:2.000000 
...
len:8, cap:8, cap incr times:1.000000 
len:9, cap:16, cap incr times:2.000000 
...
len:16, cap:16, cap incr times:1.000000 
len:17, cap:32, cap incr times:2.000000 
...
len:32, cap:32, cap incr times:1.000000 
len:33, cap:64, cap incr times:2.000000 
...
len:64, cap:64, cap incr times:1.000000 
len:65, cap:128, cap incr times:2.000000 
...
len:128, cap:128, cap incr times:1.000000 
len:129, cap:256, cap incr times:2.000000 
...
len:256, cap:256, cap incr times:1.000000 
len:257, cap:512, cap incr times:2.000000 
...
len:512, cap:512, cap incr times:1.000000 
len:513, cap:848, cap incr times:1.656250 
...
len:848, cap:848, cap incr times:1.000000 
len:849, cap:1280, cap incr times:1.509434 
...
len:1280, cap:1280, cap incr times:1.000000 
len:1281, cap:1792, cap incr times:1.400000 
...
len:1792, cap:1792, cap incr times:1.000000 
len:1793, cap:2560, cap incr times:1.428571 
...
len:2560, cap:2560, cap incr times:1.000000 
len:2561, cap:3408, cap incr times:1.331250 
...
len:3408, cap:3408, cap incr times:1.000000 
len:3409, cap:5120, cap incr times:1.502347 
...
len:5120, cap:5120, cap incr times:1.000000 
len:5121, cap:7168, cap incr times:1.400000 
...
len:7168, cap:7168, cap incr times:1.000000 
len:7169, cap:9216, cap incr times:1.285714 
...
len:9216, cap:9216, cap incr times:1.000000 
len:9217, cap:12288, cap incr times:1.333333 

可以看到 slice 扩容的容量,不是简单的线性增长。

如有错误,请联系指正。

本文由作者按照 CC BY 4.0 进行授权