Go性能分析

程序 optimization.go

package optimization

import (
	"encoding/json"
	"strconv"
)

type Request struct {
	TransactionID string `json:"section_id"`
	PayLoad       []int  `json:"payload"`
}

type Response struct {
	TransactionID string `json:"transaction_id"`
	Expression    string `json:"exp"`
}

func createRequest() string {
	payload := make([]int, 100, 100)
	for i := 0; i < 100; i++ {
		payload[i] = i
	}
	req := Request{"demo_transaction", payload}
	v, err := json.Marshal(&req)
	if err != nil {
		panic(err)
	}
	return string(v)
}

func processRequest(reqs []string) []string {
	reps := []string{}
	for _, req := range reqs {
		reqObj := &Request{}
		json.Unmarshal([]byte(req), reqObj)
		ret := ""
		for _, e := range reqObj.PayLoad {
			ret += strconv.Itoa(e) + ","
		}
		repObj := &Response{reqObj.TransactionID, ret}
		repJson, err := json.Marshal(&repObj)
		if err != nil {
			panic(err)
		}
		reps = append(reps, string(repJson))
	}
	return reps
}

测试程序 optimization_test.go

package optimization

import "testing"

func TestCreateRequest(t *testing.T)  {
	str := createRequest()
	t.Log(str)
}

func TestProcessRequest(t *testing.T)  {
	reqs := []string{}
	reqs = append(reqs, createRequest())
	reps := processRequest(reqs)
	t.Log(reps[0])
}

func BenchmarkProcessRequest(b *testing.B)  {
	reqs := []string{}
	reqs = append(reqs, createRequest())
	b.ResetTimer()
	for i := 0; i < b.N; i++{
		_ = processRequest(reqs)
	}
	b.StopTimer()
}

第一步先做简单性能测试，在命令行进入当前包目录下，运行命令：go test -bench=.，运行结果如下：

goos: windows
goarch: amd64
pkg: go_learn/ch33/optimization
BenchmarkProcessRequest-4          22869             52071 ns/op
PASS
ok      go_learn/ch33/optimization      1.775s

第二步做cpu性能测试，运行命令：go test -bench=. -cpuprofile=cpu.prof，在包目录下会出现 cpu.prof 文件

第三步运行go tool pprof cpu.prof，就可以进行如下操作了：

Type: cpu
Time: Jan 15, 2020 at 5:12pm (CST)
Duration: 1.92s, Total samples = 2s (104.12%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 830ms, 41.50% of 2000ms total
Showing top 10 nodes out of 191
      flat  flat%   sum%        cum   cum%
     150ms  7.50%  7.50%      430ms 21.50%  runtime.concatstrings
     140ms  7.00% 14.50%      140ms  7.00%  runtime.memmove
      90ms  4.50% 19.00%      330ms 16.50%  encoding/json.indirect
      90ms  4.50% 23.50%      290ms 14.50%  runtime.mallocgc
      90ms  4.50% 28.00%      130ms  6.50%  strconv.ParseUint
      80ms  4.00% 32.00%       80ms  4.00%  runtime.stdcall3
      50ms  2.50% 34.50%      620ms 31.00%  encoding/json.(*decodeState).literalStore
      50ms  2.50% 37.00%       60ms  3.00%  encoding/json.(*decodeState).rescanLiteral
      50ms  2.50% 39.50%      180ms  9.00%  strconv.ParseInt
      40ms  2.00% 41.50%       70ms  3.50%  encoding/json.stateBeginValue
(pprof) top -cum
Showing nodes accounting for 0.09s, 4.50% of 2s total
Showing top 10 nodes out of 191
      flat  flat%   sum%        cum   cum%
         0     0%     0%      1.72s 86.00%  go_learn/ch33/optimization.BenchmarkProcessRequest
         0     0%     0%      1.72s 86.00%  go_learn/ch33/optimization.processRequest
         0     0%     0%      1.72s 86.00%  testing.(*B).launch
         0     0%     0%      1.72s 86.00%  testing.(*B).runN
         0     0%     0%      1.13s 56.50%  encoding/json.Unmarshal
         0     0%     0%         1s 50.00%  encoding/json.(*decodeState).object
         0     0%     0%         1s 50.00%  encoding/json.(*decodeState).unmarshal
     0.01s   0.5%   0.5%         1s 50.00%  encoding/json.(*decodeState).value
     0.03s  1.50%  2.00%      0.95s 47.50%  encoding/json.(*decodeState).array
     0.05s  2.50%  4.50%      0.62s 31.00%  encoding/json.(*decodeState).literalStore
(pprof) list processRequest
Total: 2s
ROUTINE ======================== go_learn/ch33/optimization.processRequest in D:go_codesrcgo_learnch33optimizationoptimization.go
         0      1.72s (flat, cum) 86.00% of Total
         .          .     30:
         .          .     31:func processRequest(reqs []string) []string {
         .          .     32:   reps := []string{}
         .          .     33:   for _, req := range reqs {
         .          .     34:           reqObj := &Request{}
         .      1.13s     35:           json.Unmarshal([]byte(req), reqObj)
         .          .     36:           ret := ""
         .          .     37:           for _, e := range reqObj.PayLoad {
         .      460ms     38:                   ret += strconv.Itoa(e) + ","
         .          .     39:           }
         .       30ms     40:           repObj := &Response{reqObj.TransactionID, ret}
         .       90ms     41:           repJson, err := json.Marshal(&repObj)
         .          .     42:           if err != nil {
         .          .     43:                   panic(err)
         .          .     44:           }
         .       10ms     45:           reps = append(reps, string(repJson))
         .          .     46:   }
         .          .     47:   return reps
         .          .     48:}
(pprof) exit

通过上面分析得出，json.Unmarshal函数耗时最长，可以针对这里做优化

第四步针对json.Unmarshal进行优化，这个 json 序列化用的是 go 语言中内置的（通过反射实现，性能比较差），我们可以使用 easyjson 进行替换