filter-mutate过滤插件

之前的nginx日志使用grok匹配,但是后来发现nginx的日志中每个值之间都使用了分隔符"|",这下就可以使用mutate来分隔出每个字段的含义,同时还减少了运算。

描述

mutate过滤器允许您对字段执行常规突变。您可以重命名,删除,替换和修改事件中的字段。
长用配置选项:

  • rename:重命令字段
  • update:更新字段值,如果字段不存在,则不执行操作
  • convert:将字段转换成 其它类型
  • copy:将字段复制到另一字段
  • join:使用分隔符加入数组
  • lowercase:将字符串转换为小写
  • replace:用新值替换字段的值
  • split:使用分隔符将字段拆分为数组。仅适用于字符串字段
  • uppercase:将字符串转换为大写的等效字符串

官方文档:https://www.elastic.co/guide/en/logstash/current/plugins-filters-mutate.html


如下使用了分隔符进行处理,分隔出来的字段的数组下标是从0开始的。

input {
  file {
    path => "/tmp/nginx.log"
    start_position => "beginning"
  }
}

filter {
  mutate {
    split => ["message", "|"]
      add_field => {
        "timestamp" => "%{[message][0]}"
        "remote_addr" => "%{[message][1]}"
        "request_all" => "%{[message][2]}"
        "status" => "%{[message][3]}"
        "body_bytes_sent" => "%{[message][4]}"
        "request_time" => "%{[message][5]}"
        "request_body" => "%{[message][6]}"
        "http_referer" => "%{[message][7]}"
        "http_user_agent" => "%{[message][8]}"
        "http_x_forwarded_for" => "%{[message][9]}"
        "upstream_addr" => "%{[message][10]}"
        "upstream_response_time" => "%{[message][11]}"
        "upstream_cache_status" => "%{[message][12]}"
        "scheme" => "%{[message][13]}"
    }

  }

  grok {
    match => {
      "request_all" => "%{WORD:verb} %{URIPATHPARAM:request} HTTP/%{NUMBER:httpversion}"
    }
    remove_field => [ "request_all" ]
    remove_field => [ "message" ]
  }
    convert => {
      "body_bytes_sent" => "integer"
      "request_time" => "integer"
    }
}


output {
    elasticsearch {
      hosts => ["http://192.168.20.6:9200"]
      index => "logstash-nginx_local"
    }
}
原文地址:https://www.cnblogs.com/dance-walter/p/10196157.html