Go性能分析

程序 optimization.go

package optimization

import (
	"encoding/json"
	"strconv"
)

type Request struct {
	TransactionID string `json:"section_id"`
	PayLoad       []int  `json:"payload"`
}

type Response struct {
	TransactionID string `json:"transaction_id"`
	Expression    string `json:"exp"`
}

func createRequest() string {
	payload := make([]int, 100, 100)
	for i := 0; i < 100; i++ {
		payload[i] = i
	}
	req := Request{"demo_transaction", payload}
	v, err := json.Marshal(&req)
	if err != nil {
		panic(err)
	}
	return string(v)
}

func processRequest(reqs []string) []string {
	reps := []string{}
	for _, req := range reqs {
		reqObj := &Request{}
		json.Unmarshal([]byte(req), reqObj)
		ret := ""
		for _, e := range reqObj.PayLoad {
			ret += strconv.Itoa(e) + ","
		}
		repObj := &Response{reqObj.TransactionID, ret}
		repJson, err := json.Marshal(&repObj)
		if err != nil {
			panic(err)
		}
		reps = append(reps, string(repJson))
	}
	return reps
}

测试程序 optimization_test.go

package optimization

import "testing"

func TestCreateRequest(t *testing.T)  {
	str := createRequest()
	t.Log(str)
}

func TestProcessRequest(t *testing.T)  {
	reqs := []string{}
	reqs = append(reqs, createRequest())
	reps := processRequest(reqs)
	t.Log(reps[0])
}

func BenchmarkProcessRequest(b *testing.B)  {
	reqs := []string{}
	reqs = append(reqs, createRequest())
	b.ResetTimer()
	for i := 0; i < b.N; i++{
		_ = processRequest(reqs)
	}
	b.StopTimer()
}
  • 第一步 先做简单性能测试,在命令行进入当前包目录下,运行命令:go test -bench=.,运行结果如下:

    goos: windows
    goarch: amd64
    pkg: go_learn/ch33/optimization
    BenchmarkProcessRequest-4          22869             52071 ns/op
    PASS
    ok      go_learn/ch33/optimization      1.775s
    
  • 第二步 做cpu性能测试,运行命令:go test -bench=. -cpuprofile=cpu.prof,在包目录下会出现 cpu.prof 文件

  • 第三步 运行go tool pprof cpu.prof,就可以进行如下操作了:

    Type: cpu
    Time: Jan 15, 2020 at 5:12pm (CST)
    Duration: 1.92s, Total samples = 2s (104.12%)
    Entering interactive mode (type "help" for commands, "o" for options)
    (pprof) top
    Showing nodes accounting for 830ms, 41.50% of 2000ms total
    Showing top 10 nodes out of 191
          flat  flat%   sum%        cum   cum%
         150ms  7.50%  7.50%      430ms 21.50%  runtime.concatstrings
         140ms  7.00% 14.50%      140ms  7.00%  runtime.memmove
          90ms  4.50% 19.00%      330ms 16.50%  encoding/json.indirect
          90ms  4.50% 23.50%      290ms 14.50%  runtime.mallocgc
          90ms  4.50% 28.00%      130ms  6.50%  strconv.ParseUint
          80ms  4.00% 32.00%       80ms  4.00%  runtime.stdcall3
          50ms  2.50% 34.50%      620ms 31.00%  encoding/json.(*decodeState).literalStore
          50ms  2.50% 37.00%       60ms  3.00%  encoding/json.(*decodeState).rescanLiteral
          50ms  2.50% 39.50%      180ms  9.00%  strconv.ParseInt
          40ms  2.00% 41.50%       70ms  3.50%  encoding/json.stateBeginValue
    (pprof) top -cum
    Showing nodes accounting for 0.09s, 4.50% of 2s total
    Showing top 10 nodes out of 191
          flat  flat%   sum%        cum   cum%
             0     0%     0%      1.72s 86.00%  go_learn/ch33/optimization.BenchmarkProcessRequest
             0     0%     0%      1.72s 86.00%  go_learn/ch33/optimization.processRequest
             0     0%     0%      1.72s 86.00%  testing.(*B).launch
             0     0%     0%      1.72s 86.00%  testing.(*B).runN
             0     0%     0%      1.13s 56.50%  encoding/json.Unmarshal
             0     0%     0%         1s 50.00%  encoding/json.(*decodeState).object
             0     0%     0%         1s 50.00%  encoding/json.(*decodeState).unmarshal
         0.01s   0.5%   0.5%         1s 50.00%  encoding/json.(*decodeState).value
         0.03s  1.50%  2.00%      0.95s 47.50%  encoding/json.(*decodeState).array
         0.05s  2.50%  4.50%      0.62s 31.00%  encoding/json.(*decodeState).literalStore
    (pprof) list processRequest
    Total: 2s
    ROUTINE ======================== go_learn/ch33/optimization.processRequest in D:go_codesrcgo_learnch33optimizationoptimization.go
             0      1.72s (flat, cum) 86.00% of Total
             .          .     30:
             .          .     31:func processRequest(reqs []string) []string {
             .          .     32:   reps := []string{}
             .          .     33:   for _, req := range reqs {
             .          .     34:           reqObj := &Request{}
             .      1.13s     35:           json.Unmarshal([]byte(req), reqObj)
             .          .     36:           ret := ""
             .          .     37:           for _, e := range reqObj.PayLoad {
             .      460ms     38:                   ret += strconv.Itoa(e) + ","
             .          .     39:           }
             .       30ms     40:           repObj := &Response{reqObj.TransactionID, ret}
             .       90ms     41:           repJson, err := json.Marshal(&repObj)
             .          .     42:           if err != nil {
             .          .     43:                   panic(err)
             .          .     44:           }
             .       10ms     45:           reps = append(reps, string(repJson))
             .          .     46:   }
             .          .     47:   return reps
             .          .     48:}
    (pprof) exit
    

    通过上面分析得出,json.Unmarshal函数耗时最长,可以针对这里做优化

  • 第四步 针对json.Unmarshal进行优化,这个 json 序列化用的是 go 语言中内置的(通过反射实现,性能比较差),我们可以使用 easyjson 进行替换

原文地址:https://www.cnblogs.com/wuyongqiang/p/12198023.html