留存率例子(待优化)

留存率例子(待优化)

需求:计算用户留存率

表:user_login_detaile

数据量:10w

服务器查询时间:550ms

name type lenth
id int 11
uid int 10
os int 2
loginMethod varchar 100
mobleVerson varchar 50
channel varchar 50
isType int 2
loginTime int 10

查询思路:

1.取出id与登录时间

--由于数据库内存储的时间均为s,所以可以使用ADDDATE格式化
select 
    uid,
    date_format(ADDDATE('1970-01-01 08:00:00', INTERVAL loginTime SECOND),'%Y%m%d') loginTime
from user_login_detail

2.取出所有id第一次登录时间

SELECT
	uid,
	min(first_day) first_day
FROM
	(select 
		uid,
		date_format(ADDDATE('1970-01-01 08:00:00', INTERVAL loginTime SECOND),'%Y%m%d') first_day
	from user_login_detail
	group by 1,2) a
group by 1

3.合并需要的数据

SELECT
	b.uid,
	b.loginTime,
	c.firstday 
FROM
	(SELECT 
		uid, 
		date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d') loginTime 
	FROM 
		user_login_detail GROUP BY 1, 2) b
	LEFT JOIN 
	(SELECT 
		uid, 
		min(date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d')) firstday 
	FROM 
		(SELECT 
			uid, 
			loginTime 
		FROM 
			user_login_detail
		GROUP BY 1, 2) a 
	GROUP BY 1 ) c ON b.uid = c.uid 
ORDER BY 1,2 

4.取得用户的总留存天数

SELECT
	uid,
	loginTime,
	firstday,
	DATEDIFF( loginTime, firstday ) AS loginday 
FROM
	(SELECT
		b.uid,
			b.loginTime,
			c.firstday 
	FROM
		(SELECT 
			uid, 
			date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d') loginTime 
		FROM 
			user_login_detail GROUP BY 1, 2) b
		LEFT JOIN 
		(SELECT 
			uid, 
			min(date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d')) firstday 
		FROM 
			(SELECT 
				uid, 
				loginTime 
			FROM 
				user_login_detail
			GROUP BY 1, 2) a 
		GROUP BY 1 ) c ON b.uid = c.uid 
	ORDER BY 1,2 ) e 
ORDER BY 1,2

5.根据前台传入时间段查询留存率

SELECT
	firstday,
	sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) adduser,
	sum( CASE WHEN loginday = 1 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day2,
	sum( CASE WHEN loginday = 2 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day3,
	sum( CASE WHEN loginday = 3 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day4,
	sum( CASE WHEN loginday = 4 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day5,
	sum( CASE WHEN loginday = 5 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day6,
	sum( CASE WHEN loginday = 6 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day7,
	sum( CASE WHEN loginday = 14 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day15,
	sum( CASE WHEN loginday = 30 THEN 1 ELSE 0 END )/sum( CASE WHEN loginday = 0 THEN 1 ELSE 0 END ) day30 
FROM
	(SELECT
		uid,
		loginTime,
		firstday,
		DATEDIFF( loginTime, firstday ) AS loginday 
	FROM
		(SELECT
			b.uid,
			b.loginTime,
			c.firstday 
		FROM
			(SELECT 
				uid, 
				date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d') loginTime 
			FROM 
				user_login_detail GROUP BY 1, 2) b
			LEFT JOIN 
			(SELECT 
				uid, 
				min(date_format(ADDDATE( '1970-01-01 08:00:00', INTERVAL loginTime SECOND), '%Y%m%d')) firstday 
			FROM 
				(SELECT 
					uid, 
					loginTime 
				FROM 
					user_login_detail
				GROUP BY 1, 2) a 
			GROUP BY 1 ) c ON b.uid = c.uid 
		ORDER BY 1,2 ) e 
	ORDER BY 1,2 ) f 
WHERE
	firstday BETWEEN 20160612 AND 20160712
GROUP BY 1
ORDER BY 1
原文地址:https://www.cnblogs.com/AmierX/p/13502931.html