Your IP : 172.28.240.42


Current Path : /usr/local/go/src/cmd/compile/internal/ssa/
Upload File :
Current File : //usr/local/go/src/cmd/compile/internal/ssa/tighten.go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ssa

import "cmd/compile/internal/base"

// tighten moves Values closer to the Blocks in which they are used.
// This can reduce the amount of register spilling required,
// if it doesn't also create more live values.
// A Value can be moved to any block that
// dominates all blocks in which it is used.
func tighten(f *Func) {
	if base.Flag.N != 0 && len(f.Blocks) < 10000 {
		// Skip the optimization in -N mode, except for huge functions.
		// Too many values live across blocks can cause pathological
		// behavior in the register allocator (see issue 52180).
		return
	}

	canMove := f.Cache.allocBoolSlice(f.NumValues())
	defer f.Cache.freeBoolSlice(canMove)

	// Compute the memory states of each block.
	startMem := f.Cache.allocValueSlice(f.NumBlocks())
	defer f.Cache.freeValueSlice(startMem)
	endMem := f.Cache.allocValueSlice(f.NumBlocks())
	defer f.Cache.freeValueSlice(endMem)
	memState(f, startMem, endMem)

	for _, b := range f.Blocks {
		for _, v := range b.Values {
			if v.Op.isLoweredGetClosurePtr() {
				// Must stay in the entry block.
				continue
			}
			switch v.Op {
			case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN:
				// Phis need to stay in their block.
				// Arg must stay in the entry block.
				// Tuple selectors must stay with the tuple generator.
				// SelectN is typically, ultimately, a register.
				continue
			}
			// Count arguments which will need a register.
			narg := 0
			for _, a := range v.Args {
				// SP and SB are special registers and have no effect on
				// the allocation of general-purpose registers.
				if a.needRegister() && a.Op != OpSB && a.Op != OpSP {
					narg++
				}
			}
			if narg >= 2 && !v.Type.IsFlags() {
				// Don't move values with more than one input, as that may
				// increase register pressure.
				// We make an exception for flags, as we want flag generators
				// moved next to uses (because we only have 1 flag register).
				continue
			}
			canMove[v.ID] = true
		}
	}

	// Build data structure for fast least-common-ancestor queries.
	lca := makeLCArange(f)

	// For each moveable value, record the block that dominates all uses found so far.
	target := f.Cache.allocBlockSlice(f.NumValues())
	defer f.Cache.freeBlockSlice(target)

	// Grab loop information.
	// We use this to make sure we don't tighten a value into a (deeper) loop.
	idom := f.Idom()
	loops := f.loopnest()
	loops.calculateDepths()

	changed := true
	for changed {
		changed = false

		// Reset target
		for i := range target {
			target[i] = nil
		}

		// Compute target locations (for moveable values only).
		// target location = the least common ancestor of all uses in the dominator tree.
		for _, b := range f.Blocks {
			for _, v := range b.Values {
				for i, a := range v.Args {
					if !canMove[a.ID] {
						continue
					}
					use := b
					if v.Op == OpPhi {
						use = b.Preds[i].b
					}
					if target[a.ID] == nil {
						target[a.ID] = use
					} else {
						target[a.ID] = lca.find(target[a.ID], use)
					}
				}
			}
			for _, c := range b.ControlValues() {
				if !canMove[c.ID] {
					continue
				}
				if target[c.ID] == nil {
					target[c.ID] = b
				} else {
					target[c.ID] = lca.find(target[c.ID], b)
				}
			}
		}

		// If the target location is inside a loop,
		// move the target location up to just before the loop head.
		for _, b := range f.Blocks {
			origloop := loops.b2l[b.ID]
			for _, v := range b.Values {
				t := target[v.ID]
				if t == nil {
					continue
				}
				targetloop := loops.b2l[t.ID]
				for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
					t = idom[targetloop.header.ID]
					target[v.ID] = t
					targetloop = loops.b2l[t.ID]
				}
			}
		}

		// Move values to target locations.
		for _, b := range f.Blocks {
			for i := 0; i < len(b.Values); i++ {
				v := b.Values[i]
				t := target[v.ID]
				if t == nil || t == b {
					// v is not moveable, or is already in correct place.
					continue
				}
				if mem := v.MemoryArg(); mem != nil {
					if startMem[t.ID] != mem {
						// We can't move a value with a memory arg unless the target block
						// has that memory arg as its starting memory.
						continue
					}
				}
				if f.pass.debug > 0 {
					b.Func.Warnl(v.Pos, "%v is moved", v.Op)
				}
				// Move v to the block which dominates its uses.
				t.Values = append(t.Values, v)
				v.Block = t
				last := len(b.Values) - 1
				b.Values[i] = b.Values[last]
				b.Values[last] = nil
				b.Values = b.Values[:last]
				changed = true
				i--
			}
		}
	}
}

// phiTighten moves constants closer to phi users.
// This pass avoids having lots of constants live for lots of the program.
// See issue 16407.
func phiTighten(f *Func) {
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			if v.Op != OpPhi {
				continue
			}
			for i, a := range v.Args {
				if !a.rematerializeable() {
					continue // not a constant we can move around
				}
				if a.Block == b.Preds[i].b {
					continue // already in the right place
				}
				// Make a copy of a, put in predecessor block.
				v.SetArg(i, a.copyInto(b.Preds[i].b))
			}
		}
	}
}

// memState computes the memory state at the beginning and end of each block of
// the function. The memory state is represented by a value of mem type.
// The returned result is stored in startMem and endMem, and endMem is nil for
// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not
// suitable for infinite loop blocks that do not contain any mem operations.
// For example:
// b1:
//
//	(some values)
//
// plain -> b2
// b2: <- b1 b2
// Plain -> b2
//
// Algorithm introduction:
//  1. The start memory state of a block is InitMem, a Phi node of type mem or
//     an incoming memory value.
//  2. The start memory state of a block is consistent with the end memory state
//     of its parent nodes. If the start memory state of a block is a Phi value,
//     then the end memory state of its parent nodes is consistent with the
//     corresponding argument value of the Phi node.
//  3. The algorithm first obtains the memory state of some blocks in the tree
//     in the first step. Then floods the known memory state to other nodes in
//     the second step.
func memState(f *Func, startMem, endMem []*Value) {
	// This slice contains the set of blocks that have had their startMem set but this
	// startMem value has not yet been propagated to the endMem of its predecessors
	changed := make([]*Block, 0)
	// First step, init the memory state of some blocks.
	for _, b := range f.Blocks {
		for _, v := range b.Values {
			var mem *Value
			if v.Op == OpPhi {
				if v.Type.IsMemory() {
					mem = v
				}
			} else if v.Op == OpInitMem {
				mem = v // This is actually not needed.
			} else if a := v.MemoryArg(); a != nil && a.Block != b {
				// The only incoming memory value doesn't belong to this block.
				mem = a
			}
			if mem != nil {
				if old := startMem[b.ID]; old != nil {
					if old == mem {
						continue
					}
					f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem)
				}
				startMem[b.ID] = mem
				changed = append(changed, b)
			}
		}
	}

	// Second step, floods the known memory state of some blocks to others.
	for len(changed) != 0 {
		top := changed[0]
		changed = changed[1:]
		mem := startMem[top.ID]
		for i, p := range top.Preds {
			pb := p.b
			if endMem[pb.ID] != nil {
				continue
			}
			if mem.Op == OpPhi && mem.Block == top {
				endMem[pb.ID] = mem.Args[i]
			} else {
				endMem[pb.ID] = mem
			}
			if startMem[pb.ID] == nil {
				startMem[pb.ID] = endMem[pb.ID]
				changed = append(changed, pb)
			}
		}
	}
}