go 对中文字符串的操作

go中对于中文字符串的操作

统计字数

go在处理字符串时,经常需要知道字符串的字符数,但len()只计算字符串字节数,因此我们可以自定义处理字符串个数的函数

func countNum(handlerStr string) int {
	r := []rune(handlerStr)
	return len(r)
}

截取前10位字符串

先统计字符串的个数,然后进行截取操作

func InterceptString(resStr string) string {
	result := resStr
	r := []rune(resStr)
	if len(r) >= 10{
		result = string(r[:10])
	}
	return result
}

参考博客

golang学习笔记

如有错误,望请指正


以下内容与此篇博客无关,纯属个人笔记

场景是需要返回interface{}数据

var parserLock = sync.Mutex{}
func ParserAllMatchRules(url, html string, pattern []rules.Rule, strategy int) (interface{}, error) {
	parserLock.Lock()
	if strategy != 2 && len([]rune(html)) > 800000 {
		parserLock.Unlock()
		return nil, errors.New("html is too long:" + url)
	}

	var assemblyData []interface{}

	for _, rule := range pattern {
		response := graphquery.ParseFromString(html, rule.Patterns)
		if response.Data != nil{
			resParseData := response.Data.(map[string]interface{})
			allUrlList := resParseData["data"].([]interface{})
			assemblyData = append(assemblyData, allUrlList...)
		} else {
			zap.S().Infow("more rule parse failed", "data", response.Data, "response", response)
		}
	}

	if len(assemblyData) == 0{
		zap.S().Infow("response parser length is zero", "resultData")
		parserLock.Unlock()
		return nil, errors.New("data length is zero")
	}

	//去除重复链接
	assemblyData = removeDuplicateElement(assemblyData)
	zap.S().Info("列表长度为:", len(assemblyData))

	parserLock.Unlock()
	resultData := map[string]interface{}{}
	resultData["data"] = assemblyData
	return resultData, nil
}

func removeDuplicateElement(languages []interface{}) []interface{} {
	result := make([]interface{}, 0, len(languages))
	temp := map[string]struct{}{}
	for _, item := range languages {
		resUrl := item.(map[string]interface{})["url"].(string)
		if resUrl == ""{
			continue
		}
		if _, ok := temp[resUrl]; !ok {
			temp[resUrl] = struct{}{}
			result = append(result, item)
		}
	}
	return result
}

原文地址:https://www.cnblogs.com/ppwang06/p/string_handler.html