Go 的 slice 扩容机制

如题，这是个老生常谈的问题了，但作为一个经典 Go 面试八股文题目，它很尴尬，因为它变化很频繁，不同 Go 版本的 slice 扩容机制可能不同。网络上的资料五花八门，很多已经过时了，但当面试官拿着过时的标准答案来比对你的回答时，就很无奈了。

笔者拿现在（截止文章发布日期）最新版本 Go1.19.3 进行讨论。

标准答案必须从源码中获得，slice 扩容的核心函数是 growslice：

  
// src/runtime/slice.go

// growslice handles slice growth during append.
// It is passed the slice element type, the old slice, and the desired new minimum capacity,
// and it returns a new slice with at least that capacity, with the old data
// copied into it.
// The new slice's length is set to the old slice's length,
// NOT to the new requested capacity.
// This is for codegen convenience. The old slice's length is used immediately
// to calculate where to write new values during an append.
// TODO: When the old backend is gone, reconsider this decision.
// The SSA backend might prefer the new length or to return only ptr/cap and save stack space.
// et 为 slice 元素类型
// old 为原 slice
// cap 为扩容所需最小容量
func growslice(et *_type, old slice, cap int) slice {

    // 无关代码开始 ↓
	if raceenabled {
		callerpc := getcallerpc()
		racereadrangepc(old.array, uintptr(old.len*int(et.size)), callerpc, abi.FuncPCABIInternal(growslice))
	}
	if msanenabled {
		msanread(old.array, uintptr(old.len*int(et.size)))
	}
	if asanenabled {
		asanread(old.array, uintptr(old.len*int(et.size)))
	}

    // 无关代码结束 ↑

	if cap < old.cap {
        // 小于原 cap，非法的 cap
		panic(errorString("growslice: cap out of range"))
	}

	if et.size == 0 {
		// append should not create a slice with nil pointer but non-zero len.
		// We assume that append doesn't need to preserve old.array in this case.
		return slice{unsafe.Pointer(&zerobase), old.len, cap}
	}

	newcap := old.cap
	doublecap := newcap + newcap
	if cap > doublecap {
        // 1. cap大于2倍原容量，newcap使用cap
		newcap = cap
	} else {
		const threshold = 256
		if old.cap < threshold {
            // 2. 原容量小于阈值，newcap使用2倍原容量
			newcap = doublecap
		} else {
            // 原容量大于阈值
			// Check 0 < newcap to detect overflow
			// and prevent an infinite loop.
			for 0 < newcap && newcap < cap {
				// Transition from growing 2x for small slices
				// to growing 1.25x for large slices. This formula
				// gives a smooth-ish transition between the two.
                // 
                // 3. newcap每次增长 (newcap + 3*threshold) / 4，直到 newcap 不小于 cap
                // 这种方式是小容量2倍增长和大容量1.25倍增长机制的折中处理
				newcap += (newcap + 3*threshold) / 4
			}
			// Set newcap to the requested cap when
			// the newcap calculation overflowed.
			if newcap <= 0 {
                // 4. 前面的计算中 newcap 可能会溢出变成负数，这时 newcap 使用 cap
				newcap = cap
			}
		}
	}

	var overflow bool
    // lenmem，slice 原数组数据内存大小，单位为字节
    // newlenmem，slice 数组数据所需内存的最小大小，单位为字节
    // capmem， slice 扩容后数组空间内存大小
	var lenmem, newlenmem, capmem uintptr
	// Specialize for common values of et.size.
	// For 1 we don't need any division/multiplication.
	// For goarch.PtrSize, compiler will optimize division/multiplication into a shift by a constant.
	// For powers of 2, use a variable shift.
    // 4. 内存对齐操作，会再调整 newcap
	switch {
	case et.size == 1:
        // 元素大小为1字节
		lenmem = uintptr(old.len)
		newlenmem = uintptr(cap)
		capmem = roundupsize(uintptr(newcap))
		overflow = uintptr(newcap) > maxAlloc
		newcap = int(capmem)
	case et.size == goarch.PtrSize:
        // 元素大小为相应平台的指针大小(也是原生字长，32位为4字节，64位为8字节)
		lenmem = uintptr(old.len) * goarch.PtrSize
		newlenmem = uintptr(cap) * goarch.PtrSize
		capmem = roundupsize(uintptr(newcap) * goarch.PtrSize)
		overflow = uintptr(newcap) > maxAlloc/goarch.PtrSize
		newcap = int(capmem / goarch.PtrSize)
	case isPowerOfTwo(et.size):
        // 元素大小是2的幂
		var shift uintptr
		if goarch.PtrSize == 8 {
			// Mask shift for better code generation.
			shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
		} else {
			shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
		}
		lenmem = uintptr(old.len) << shift
		newlenmem = uintptr(cap) << shift
		capmem = roundupsize(uintptr(newcap) << shift)
		overflow = uintptr(newcap) > (maxAlloc >> shift)
		newcap = int(capmem >> shift)
	default:
        // 其它的元素大小
		lenmem = uintptr(old.len) * et.size
		newlenmem = uintptr(cap) * et.size
		capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
		capmem = roundupsize(capmem)
		newcap = int(capmem / et.size)
	}

	// The check of overflow in addition to capmem > maxAlloc is needed
	// to prevent an overflow which can be used to trigger a segfault
	// on 32bit architectures with this example program:
	//
	// type T [1<<27 + 1]int64
	//
	// var d T
	// var s []T
	//
	// func main() {
	//   s = append(s, d, d, d, d)
	//   print(len(s), "\n")
	// }
	if overflow || capmem > maxAlloc {
        // slice 增长地太大，超出内存分配限制
		panic(errorString("growslice: cap out of range"))
	}

	var p unsafe.Pointer
	if et.ptrdata == 0 {
		p = mallocgc(capmem, nil, false)
		// The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length).
		// Only clear the part that will not be overwritten.
        // 清空 old.len、old.cap 的内存，后面还要重新赋值 len 和 cap
		memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
	} else {
		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
		p = mallocgc(capmem, et, true)
		if lenmem > 0 && writeBarrier.enabled {
			// Only shade the pointers in old.array since we know the destination slice p
			// only contains nil pointers because it has been cleared during alloc.
			bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem-et.size+et.ptrdata)
		}
	}
    // 将原数据迁移到新申请的内存空间上
	memmove(p, old.array, lenmem)
    // 返回新 slice
	return slice{p, old.len, newcap}
}

上面的代码中我添加了注释，关乎扩容的逻辑重点有4个：

// 1. cap大于2倍原容量，newcap使用cap // 2. 原容量小于阈值，newcap使用2倍原容量 // 3. newcap每次增长 (newcap + 3*threshold) / 4，直到 newcap 不小于 cap，这种方式是小容量2倍增长和大容量1.25倍增长机制的折中处理 // 4. 内存对齐操作，会再调整 newcap

上面第3步中，在旧 Go 版本中是按1.25倍增长的，但这种方式太粗暴，因此在最新版中采取了更柔和的方式扩容。

整理成流程图如下：

我们来实际看下当 slice 逐步增加元素时，len，cap 的变化：

  
func main() {
	f, err := os.Create("sliceIncr.txt")
	if err != nil {
		panic(err)
	}
	defer func() { _ = f.Close() }()

	s := make([]int, 0)

	lastCap := 0
	for i := 0; i < 10000; i++ {
		var incrRatio = 0.0
		if lastCap != 0 {
			incrRatio = float64(cap(s)) / float64(lastCap)
		}
		f.WriteString(fmt.Sprintf("len:%d, cap:%d, cap incr times:%f \n", len(s), cap(s), incrRatio))
		s = append(s, i)
		lastCap = cap(s)
	}
}

记录的文件内容，部分行数据已省略：

len:1, cap:1, cap incr times:0.000000 
len:2, cap:2, cap incr times:2.000000 
len:3, cap:4, cap incr times:2.000000 
len:4, cap:4, cap incr times:1.000000 
len:5, cap:8, cap incr times:2.000000 
...
len:8, cap:8, cap incr times:1.000000 
len:9, cap:16, cap incr times:2.000000 
...
len:16, cap:16, cap incr times:1.000000 
len:17, cap:32, cap incr times:2.000000 
...
len:32, cap:32, cap incr times:1.000000 
len:33, cap:64, cap incr times:2.000000 
...
len:64, cap:64, cap incr times:1.000000 
len:65, cap:128, cap incr times:2.000000 
...
len:128, cap:128, cap incr times:1.000000 
len:129, cap:256, cap incr times:2.000000 
...
len:256, cap:256, cap incr times:1.000000 
len:257, cap:512, cap incr times:2.000000 
...
len:512, cap:512, cap incr times:1.000000 
len:513, cap:848, cap incr times:1.656250 
...
len:848, cap:848, cap incr times:1.000000 
len:849, cap:1280, cap incr times:1.509434 
...
len:1280, cap:1280, cap incr times:1.000000 
len:1281, cap:1792, cap incr times:1.400000 
...
len:1792, cap:1792, cap incr times:1.000000 
len:1793, cap:2560, cap incr times:1.428571 
...
len:2560, cap:2560, cap incr times:1.000000 
len:2561, cap:3408, cap incr times:1.331250 
...
len:3408, cap:3408, cap incr times:1.000000 
len:3409, cap:5120, cap incr times:1.502347 
...
len:5120, cap:5120, cap incr times:1.000000 
len:5121, cap:7168, cap incr times:1.400000 
...
len:7168, cap:7168, cap incr times:1.000000 
len:7169, cap:9216, cap incr times:1.285714 
...
len:9216, cap:9216, cap incr times:1.000000 
len:9217, cap:12288, cap incr times:1.333333 

可以看到 slice 扩容的容量，不是简单的线性增长。

如有错误，请联系指正。

Go 的 slice 扩容机制

相关文章

Go中对一个slice一边遍历一边append能否无限循环下去？

创建 slice 出错

最长公共子序列