nginx 日志json格式化 filebeat采集nginx日志转发kafka

nginx 格式化日志主要参数是http里的log_format

举例

log_format json_log '{"a":"b","c":"d"}';

access_log logs/access.log json_log;

nginx 配置:

http {
    log_format json_log '{"commonAnnotations": {"log_province": "8310000","logTime": "$datetime","logID": "zqhw-ue-$time_str","Log_type": "008","dev_type": "Linux 2.6.32-358.el6.x86_64","equ_manuf": "042","dev_name": "yhtyapp051151","dev_mod": "Red Hat 4.4.7-3","devIPv4": "$server_addr","devIPv6": "fe80::250:56ff:febc:6d94/64","devMAC": "00:50:56:BC:6D:94","device_id": "SHWEBZQHWUE"},"busiAnnotations": {"collect_ipv4": "$server_addr","collect_ipv6": "fe80::250:56ff:febc:6d94/64","create_time": "$datetime","priority": "6","severity": "4","src_ipv4": "$x_forwarded_ip","src_ipv6": "","src_port": "$remote_port","protocol": "$server_protocol","dst_ipv4": "$server_addr","dst_ipv6": "","dst_port": "$server_port","http_method": "$request_method","user_name": "hgx","url": "$request_uri","response_code": "$status","bytes": "$body_bytes_sent"}}';
}

server {
        listen 80 ; 
        server_name kt.jiafeimao.com;
       
        root /app/htdocs/jiafeimao;
        add_header X-Frame-Options SAMEORIGIN;
        add_header Strict-Transport-Security "max-age=63072000; includeSubdomains; preload";
        add_header Content-Security-Policy none;
        add_header X-Content-Type-Options nosniff;
        add_header X-XSS-Protection 1;
        error_page 502 503 504 /404.html;
        default_type 'text/html';
        charset utf-8;

        location ~ .*.(gif|jpg|png|jpeg|css|js|flv|ico|swf)(.*) {
             expires 1d;
        }

　　　　#获取当前时间,并设定变量$date日期 $datetime日期时间 $time_str当前日期时间字符串
        if ($time_iso8601 ~ "^(d{4})-(d{2})-(d{2})T(d{2}):(d{2}):(d{2})") {
            set $date "$1-$2-$3";
            set $datetime "$1-$2-$3 $4:$5:$6";
            set $time_str "$1$2$3$4$5$6";
        }
　　　　　#如果开启了http_x_forwarded 获取真实客户访问ip地址，如果用了反向代理,第一个ip为真实ip 后面一次是反向代理转发经过的代理服务器ip
        if ($http_x_forwarded_for ~ "^(d{1,3}.d{1,3}.d{1,3}.d{1,3})") {
            set $x_forwarded_ip "$1";
        }
　　　　 #指定日志生成格式按年与日生成,及日志生成格式为json
        access_log  logs/access-$date.log  json_log;

       
        #禁止访问目录和文件
        location ~* ^/Uploads/.*.(php|php5)$ {deny all;}
     
        location ~ /.*.php/ {
                    rewrite ^(.*?/?)(.*.php)(.*)$ /$2?s=$3 last;
                    break;
        }

}

filebeat配置:

###################### Filebeat Configuration Example #########################

# This file is an example configuration file highlighting only the most common
# options. The filebeat.full.yml file from the same directory contains all the
# supported options with more comments. You can use it as a reference.
#
# You can find the full configuration reference here:
# https://www.elastic.co/guide/en/beats/filebeat/index.html

#=========================== Filebeat prospectors =============================

filebeat.prospectors:

# Each - is a prospector. Most options can be set at the prospector level, so
# you can use different prospectors for various configurations.
# Below are the prospector specific configurations.

- input_type: log

  # Paths that should be crawled and fetched. Glob based paths.
  paths:
    - /app/nginx/logs/access-*.log
  
  #启用json格式日志获取
  json.keys_under_root: true
  #覆盖已有的日志字段
  json.overwrite_keys: true
  # 指定json 关键建作为过滤和多行设置，与之关联的值必须是string
    #- c:programdataelasticsearchlogs*

  # Exclude lines. A list of regular expressions to match. It drops the lines that are
  # matching any regular expression from the list.
  # exclude_lines: ["^DBG"]

  # Include lines. A list of regular expressions to match. It exports the lines that are
  # matching any regular expression from the list.
  #include_lines: ["^ERR", "^WARN"]

  # Exclude files. A list of regular expressions to match. Filebeat drops the files that
  # are matching any regular expression from the list. By default, no files are dropped.
  #exclude_files: [".gz$"]

  # Optional additional fields. These field can be freely picked
  # to add additional information to the crawled log files for filtering
  #fields:
  #  level: debug
  #  review: 1

  ### Multiline options

  # Mutiline can be used for log messages spanning multiple lines. This is common
  # for Java Stack Traces or C-Line Continuation

  # The regexp Pattern that has to be matched. The example pattern matches all lines starting with [
  #multiline.pattern: ^[

  # Defines if the pattern set under pattern should be negated or not. Default is false.
  #multiline.negate: false

  # Match can be set to "after" or "before". It is used to define if lines should be append to a pattern
  # that was (not) matched before or after or as long as a pattern is not matched based on negate.
  # Note: After is the equivalent to previous and before is the equivalent to to next in Logstash
  #multiline.match: after


#================================ General =====================================

# The name of the shipper that publishes the network data. It can be used to group
# all the transactions sent by a single shipper in the web interface.
#name:

# The tags of the shipper are included in their own field with each
# transaction published.
#tags: ["service-X", "web-tier"]

# Optional fields that you can specify to add additional information to the
# output.
#fields:
#  env: staging

#================================ Outputs =====================================

# Configure what outputs to use when sending the data collected by the beat.
# Multiple outputs may be used.

#-------------------------- Elasticsearch output ------------------------------
# output.elasticsearch:
  # Array of hosts to connect to.
  # hosts: ["localhost:9200"]

  # Optional protocol and basic auth credentials.
  #protocol: "https"
  #username: "elastic"
  #password: "changeme"

#----------------------------- Logstash output --------------------------------
#output.logstash:
  # The Logstash hosts
  #hosts: ["localhost:5044"]

  # Optional SSL. By default is off.
  # List of root certificates for HTTPS server verifications
  #ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]

  # Certificate for SSL client authentication
  #ssl.certificate: "/etc/pki/client/cert.pem"

  # Client Certificate Key
  #ssl.key: "/etc/pki/client/cert.key"
  
#----------------------------- Kafka output ------------------------------------
#kafka输出地址 kafka的监听topic 要和此处topic相同
output.kafka:
  enable: true
  hosts: ["localhost:8184","10.128.54.63:9092"]
  topic: "test"
  
#================================ Logging =====================================

# Sets log level. The default log level is info.
# Available log levels are: critical, error, warning, info, debug
#logging.level: debug

# At debug level, you can selectively enable logging only for some components.
# To enable all selectors use ["*"]. Examples of other selectors are "beat",
# "publish", "service".
#logging.selectors: ["*"]
#过滤指定变量日志不显示,其中@timestamp和type 这两个在低版本无法去除,高版本网上或官网搜一下教材
processors:
  - drop_fields:
      fields: ["@timestamp","sort","beat","input_type","offset","source","type"]

备注:

filebeat 启动命令 :

cd进入filebeat根目录

启动并将日志输出到控制台,用于调试

./filebeat -e -c ./filebeat.yml -d "publish"

启动并将日志输出到缓存
nohup ./filebeat -e -c filebeat.yml >/dev/null 2>&1 &

kafka启动命令

启动zookeeper
./bin/zookeeper-server-start.sh ./config/zookeeper.properties &

启动kafka
./bin/kafka-server-start.sh ./config/server.properties &

创建topic
./bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
列出topic
./bin/kafka-topics.sh --list --zookeeper localhost:2181

启动生产者并发送消息
./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test

开启另外一个终端，启动消费者接受消息
./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test --from-beginning