db timedb / ClickHouse / clickhouse

s

https://clickhouse.yandex

clickhouse 基础知识

https://www.jianshu.com/p/a5bf490247ea  

《大数据实时分析领域的黑马ClickHouse》二次解读

https://blog.csdn.net/haitianxueyuan521/article/details/80983001

ClickHouse 1秒,Vertica 5.42秒,Hive 279秒;

ClickHouse比Vertia快约5倍,比Hive快279倍,比My SQL 快801倍 

ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)。

https://clickhouse.yandex/docs/zh/ 

常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。

ClickHouse集群安装

yum install -y libicu.x86_64

rpm -ivh /opt/soft/clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-server-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-client-19.5.3.8-1.el7.x86_64.rpm

mkdir -p /etc/clickhouse-server/conf.d

mkdir -p /data/clickhouse/data;mkdir -p /data/clickhouse/log;mkdir -p /data/clickhouse/tmp

chmod 777 -R /etc/clickhouse-server/conf.d

chmod 777 -R /data  注:若启动失败,全部chmod 777 再走一遍,个渣CK软件。

修改文件:/etc/clickhouse-server/config.xml

修改文件:/etc/clickhouse-server/users.xml

创建文件:/etc/clickhouse-server/conf.d/clickhouse_remote_servers.xml

新建文件:/etc/clickhouse-server/conf.d/zookeeper-servers.xml

新建文件:/etc/clickhouse-server/conf.d/macros.xml

ClickHouse版本升级

https://my.oschina.net/aubao/blog/2244621 

 安装clickhouse步骤,依次如下执行安装。

[root@centos7 ~]# yum install -y unixODBC libicudata 

[root@centos7 ~]# yum install -y libicu.x86_64

[root@centos7 ~]# grep -q sse4_2 /proc/cpuinfo && echo “SSE 4.2 supported” || echo “SSE 4.2 not supported”
“SSE 4.2 supported”

1、安装rpm -ivh clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm2、安装rpm -ivh clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm 
3、安装rpm -ivh clickhouse-server-19.5.3.8-1.el7.x86_64.rpm
4、安装rpm -ivh clickhouse-client-19.5.3.8-1.el7.x86_64.rpm  

[root@centos7 ~]# rpm -qa|grep clickhouse
clickhouse-server-common-19.5.3.8-1.el7.x86_64
clickhouse-common-static-19.5.3.8-1.el7.x86_64
clickhouse-client-19.5.3.8-1.el7.x86_64
clickhouse-server-19.5.3.8-1.el7.x86_64

[root@centos7 ~]# clickhouse-server -V

ClickHouse server version 19.5.3.1

[root@centos7 ~]# /etc/init.d/clickhouse-server restart
Start clickhouse-server service: Path to data directory in /etc/clickhouse-server/config.xml: /var/lib/clickhouse/
DONE

自定义ClickHouse 部署

1、修改文件:/etc/clickhouse-server/config.xml

<log>/opt/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/opt/log/clickhouse-server/clickhouse-server.err.log</errorlog>

Clickhouse单机部署以及从mysql增量同步数据

https://www.cnblogs.com/gomysql/p/11199856.html 

//创建数据库
CREATE DATABASE datacollector on CLUSTER logs

//创建日志事件物理表
CREATE TABLE datacollector.log_event ON CLUSTER logs(id Int64,ip String,mac String,time DateTime,eId Int64,eCount Int64,eMsg String,eClass String,eLine Int64,eExceptions String) ENGINE = ReplicatedMergeTree('/clickhouse/logs/log_event/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(time) ORDER BY (id,mac) SETTINGS index_granularity = 8192
//创建日志事件分布式表
CREATE TABLE datacollector.log_event_all ON CLUSTER logs AS datacollector.log_event ENGINE = Distributed(logs, datacollector, log_event, rand())
//查询某个任务的日志数据
SELECT MAX(id) as max_id,SUM(eCount) as counts,eClass,eLine,groupArray(eMsg) as msgs,groupArray(time) as times,count() FROM datacollector.log_event_all WHERE id = 1 GROUP BY eClass,eLine

//创建sample数据的物理表
CREATE TABLE datacollector.perf_sample ON CLUSTER logs (id Int64,des String,sPath String,rTime Int64,counts Int64,mac String,cTime DateTime) ENGINE = ReplicatedMergeTree('/clickhouse/logs/perf_sample/{shard_statistics}', '{replica_statistics}')PARTITION BY cTime ORDER BY (id) SETTINGS index_granularity = 8192;
//创建sample数据的分布式表
CREATE TABLE datacollector.perf_sample_all ON CLUSTER logs AS datacollector.perf_sample ENGINE = Distributed(logs, datacollector, perf_sample, rand())
//创建sample数据的物化视图
CREATE MATERIALIZED VIEW datacollector.v_perf_sample ON CLUSTER logs ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/logs/v_perf_sample/{shard_statistics}', '{replica_statistics}') PARTITION BY id ORDER BY (id,sPath,des,rTime)as select id,sPath,des,rTime,sumState(counts) as numbers,anyState(mac) as address,maxState(cTime) as time from datacollector.perf_sample group by id,sPath,des,rTime
//创建samples数据的分布式物化视图
CREATE TABLE datacollector.v_perf_sample_all ON CLUSTER logs AS datacollector.v_perf_sample ENGINE = Distributed(logs, datacollector, v_perf_sample,rand())
//查询某个任务的聚合结果
SELECT id,sPath,des,rTime,sumMerge(numbers),maxMerge(time) FROM datacollector.v_perf_sample_all WHERE id = 20 group by id,sPath,des,rTime
//查询所有任务的聚合结果
SELECT id,sPath,des,rTime,sumMerge(numbers),maxMerge(time) FROM datacollector.v_perf_sample_all group by id,sPath,des,rTime


//创建agent数据的物理表
CREATE TABLE datacollector.agent_status ON CLUSTER logs (id Int64,ip String,mac String,fMem Int64,tMem Int64,uCPU Float32,receive Int64,send Int64,cTime DateTime) ENGINE = ReplicatedMergeTree('/clickhouse/logs/agent_status/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(cTime) ORDER BY (id,ip,mac) SETTINGS index_granularity = 8192
//创建agent数据的分布式表
CREATE TABLE datacollector.agent_status_all ON CLUSTER logs AS datacollector.agent_status ENGINE = Distributed(logs, datacollector, agent_status, rand())
//查询agent数据
SELECT * FROM datacollector.agent_status_all


//创建事务数据的物理表
CREATE TABLE datacollector.perf_transaction ON CLUSTER logs(id Int64,ip String,mac String,sPath String,errors Int64,tNum Int64,des String,tests Int64,thread String,rt Float32,rx Int64,minRT Float32,maxRT Float32, tps Int64,fBRT Int64,cTime DateTime,tTime Float32)ENGINE = ReplicatedMergeTree('/clickhouse/logs/perf_transaction/{shard_statistics}', '{replica_statistics}')PARTITION BY toYYYYMMDD(cTime) ORDER BY (id) SETTINGS index_granularity = 8192;
//创建事务数据的分布式表
CREATE TABLE datacollector.perf_transaction_all ON CLUSTER logs AS datacollector.perf_transaction ENGINE = Distributed(logs, datacollector, perf_transaction, rand())
//创建事务数据的物列化视图
CREATE MATERIALIZED VIEW datacollector.v_perf_transaction ON CLUSTER logs ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/logs/v_perf_transaction/{shard_statistics}', '{replica_statistics}') PARTITION BY toYYYYMM(create_time) ORDER BY (id,path,des,create_time) as select id,sPath as path,des,sumState(errors) as total_errors,sumState(toFloat32OrZero(thread)) as total_threads,sumState(rt) as total_response_time,sumState(rx) as total_traffic,sumState(tests) as total_tests,sumState(fBRT) as total_first_byte_response_time,minState(minRT) as min_response_time,maxState(maxRT) as max_response_time,sumState(tps) as total_tps,cTime as create_time,sumState(tTime) as total_time from datacollector.perf_transaction group by id,path,des,create_time
//创建事务数据的分布式物列化视图
CREATE TABLE datacollector.v_perf_transaction_all ON CLUSTER logs AS datacollector.v_perf_transaction ENGINE = Distributed(logs, datacollector, v_perf_transaction,rand())
//查询事务聚合结果
SELECT id,path,des,create_time,sumMerge(total_errors) as total_errors,sumMerge(total_threads) as total_threads,sumMerge(total_time)/sumMerge(total_tests) as response_time ,sumMerge(total_first_byte_response_time) as total_first_byte_response_time,minMerge(min_response_time) as min_response_time,maxMerge(max_response_time) as max_response_time,sumMerge(total_tps) as total_tps FROM datacollector.v_perf_transaction_all GROUP BY id,path,des,create_time


 

end

yum install -y libicu.x86_64

rpm -ivh /opt/soft/clickhouse-server-common-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-common-static-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-server-19.5.3.8-1.el7.x86_64.rpm

rpm -ivh /opt/soft/clickhouse-client-19.5.3.8-1.el7.x86_64.rpm

原文地址:https://www.cnblogs.com/lindows/p/11474697.html