实时日志数据写入Clickhouse

一、背景

每天上百亿的日志数据实时查询是个挑战,在架构设计上采用了Kafka + Flink + Clickhouse+Redash,实现海量数据的实时分析。计算层,我们开发了基于Flink计算引擎的实时数据平台,简化开发流程,数据通过配置化实现动态Schema生成,底层数据解析统一,无需重复造轮子,整个数据链路,从数据的采集,转换,存储,可视化,无需写一行代码,配置化完成。本文主要介绍实时日志数据写入Clickhouse的实践。

Flink Clickhouse Sink

<dependency>
	<groupId>ru.yandex.clickhouse</groupId>
	<artifactId>clickhouse-jdbc</artifactId>
	<version>0.1.50</version>
</dependency>
public class ClickhouseSink extends RichSinkFunction<Row> implements Serializable {
    private String tablename;
    private String[] tableColums; 
    private List<String> types;   
    private String[] columns;    
    private String username;     
    private String password;     
    private String[] ips;   
    private String drivername = "ru.yandex.clickhouse.ClickHouseDriver";
    private List<Row> list = new ArrayList<>();
    private List<PreparedStatement> preparedStatementList = new ArrayList<>();
    private List<Connection> connectionList = new ArrayList<>();
    private List<Statement> statementList = new ArrayList<>();

    private long lastInsertTime = 0L;
    private long insertCkTimenterval = 4000L;
    // 插入的批次
    private int insertCkBatchSize = 10000;

    public ClickhouseSink(String tablename, String username, String password, String[] ips, String[] tableColums, List<String> types, String[] columns) {
        this.tablename = tablename;
        this.username = username;
        this.password = password;
        this.ips = ips;
        this.tableColums = tableColums;
        this.types = types;
        this.columns = columns;  // 新增字段
    }

    // 插入数据
    public void insertData(List<Row> rows, PreparedStatement preparedStatement, Connection connection) throws SQLException {

        for (int i = 0; i < rows.size(); ++i) {
            Row row = rows.get(i);
            for (int j = 0; j < this.tableColums.length; ++j) {
                if (null != row.getField(j)) {
                    preparedStatement.setObject(j + 1, row.getField(j));

                } else {
                    preparedStatement.setObject(j + 1, "null");
                }
            }
            preparedStatement.addBatch();
        }

        preparedStatement.executeBatch();
        connection.commit();
        preparedStatement.clearBatch();
    }


    /**
     * 新增字段修改表添加列
     *
     * @param statement
     * @throws Exception
     */
    public void tableAddColumn(Statement statement) {
        try {
            if (null != this.columns && this.columns.length > 0) {

                /**
                 * table 增加字段
                 */
                // 获取原表字段名
                String querySql = "select * from " + this.tablename + " limit 1";

                ResultSet rs = statement.executeQuery(querySql);
                ResultSetMetaData rss = rs.getMetaData();
                int columnCount = rss.getColumnCount();

                List<String> orgTabCols = new ArrayList<>();
                for (int i = 1; i <= columnCount; ++i) {
                    orgTabCols.add(rss.getColumnName(i));
                }

                // 对比两个数组,判断新增字段是否在原来的表中
                Collection collection = new ArrayList<String>(orgTabCols);
                boolean exists = collection.removeAll(Arrays.asList(this.columns));

                // 新增字段不在原来的表中,执行添加列操作
                if (!exists) {

                    for (int i = 0; i < this.columns.length; ++i) {
                        String str = "";
                        String str_all = "";

                        StringBuilder sb = null;
                        StringBuilder sb_all = null;
                        if (i == 0) {
                            sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1));
                            sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1));

                        } else {
                            sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]);

                            sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]);
                        }

                        if (StringUtils.isNotEmpty(sb.toString())) {
                            statement.executeUpdate(sb.toString());
                        }

                        if (StringUtils.isNotEmpty(sb_all.toString())) {
                            statement.executeUpdate(sb_all.toString());
                        }
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // 根据IP创建连接
    public void createConnection() throws Exception {

        // 插入语句
        String insertStr = StrUtils.clickhouseInsertValue(this.tableColums, this.tablename);
        // 创建表
        List<String> createtableStrList = StrUtils.clickhouseCreatTable(this.tableColums, this.tablename, Constant.CKCLUSTERNAME, this.tableColums[3], this.types);
        // 创建数据库
        String create_database_str = "create database if not exists " + this.tablename.split("\.")[0];

        for (String ip : this.ips) {
            String url = "jdbc:clickhouse://" + ip + ":8123";
            Connection connection = DriverManager.getConnection(url, this.username, this.password);
            Statement statement = connection.createStatement();

            // 执行创建数据库
            statement.executeUpdate(create_database_str);

            // 执行创建表
            statement.executeUpdate(createtableStrList.get(0));
            statement.executeUpdate(createtableStrList.get(1));

            // 增加表字段
            tableAddColumn(statement);

            this.statementList.add(statement);
            PreparedStatement preparedStatement = connection.prepareStatement(insertStr);
            connection.setAutoCommit(false);
            this.preparedStatementList.add(preparedStatement);
            this.connectionList.add(connection);
        }

    }


    @Override
    public void open(Configuration parameters) throws Exception {

        Class.forName(this.drivername);

        // 创建连接
        createConnection();
    }

    @Override
    public void invoke(Row row, Context context) throws Exception {
        
        // 轮询写入各个local表,避免单节点数据过多
        if (null != row) {
            Random random = new Random();
            int index = random.nextInt(this.ips.length);
            switch (index) {

                case 0:
                    if(list.size() >= this.insertCkBatchSize || isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(0),connectionList.get(0));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;
                case 1:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(1),connectionList.get(1));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;
                case 2:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(2),connectionList.get(2));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }
                    break;
                case 3:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(3),connectionList.get(3));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;
                case 4:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(4),connectionList.get(4));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;
                case 5:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(5),connectionList.get(5));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;

                case 6:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(6),connectionList.get(6));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }
                    break;
                case 7:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(7),connectionList.get(7));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }
                    break;

                case 8:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(8),connectionList.get(8));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }
                    break;
                case 9:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(9),connectionList.get(9));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }

                    break;
                case 10:
                    if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) {
                        insertData(list,preparedStatementList.get(10),connectionList.get(10));
                        list.clear();
                        this.lastInsertTime = System.currentTimeMillis();
                    } else {
                        list.add(row);
                    }
                    break;

            }
        }
    }

    @Override
    public void close() throws Exception {

        for (Statement statement : this.statementList) {
            if (null != statement) {
                statement.close();
            }
        }

        for (PreparedStatement preparedStatement : this.preparedStatementList) {
            if (null != preparedStatement) {
                preparedStatement.close();
            }
        }

        for (Connection connection : this.connectionList) {
            if (null != connection) {
                connection.close();
            }
        }
    }

    /**
     * 根据时间判断是否插入数据
     *
     * @return
     */
    private boolean isTimeToDoInsert() {
        long currTime = System.currentTimeMillis();
        return currTime - this.lastInsertTime >= this.insertCkTimenterval;
    }
}

通过自定义Sink方式写入Clickhouse,底层还是使用JDBC的方式,要注意插入不要过于频繁,否则会报错误(数据插入的频率大于数据合并)批次插入,批次最好设置大点,轮询写入每个节点方式有待优化。

原文地址:https://www.cnblogs.com/zfwwdz/p/13164474.html