Python 查找binlog文件

经常需要在 binlog 中查找一些日志信息,于是写了一个简单的脚本。对于非常巨大的 binlog 文件,该脚本可能会速度慢,毕竟还是用的 list,暂时没想到好办法。

详细看代码:

#/usr/bin/python

#2016-04-12

#search string in the binlogs

#usage:
#put this file into binlog-dir,exec as:
#"python test.py 111 123 update" or 
#"python test.py 111 123 update and insert" or 
#"python test.py 111 123 update or delete"
#the nums are the binlog-num.

import sys
import subprocess
 
def find_str(files):
    for file in files:
        comm = "mysqlbinlog {0}".format(file)
        lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
        lines = lines.stdout.readlines()
        for line in lines:
            line = line.lower()
            if len(sys.argv) == 4:
                if sys.argv[3] + ' ' in line:
                    yield line
            elif len(sys.argv) == 6 and sys.argv[4] == "and":
                if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line:
                    yield line
            elif len(sys.argv) == 6 and sys.argv[4] == "or":
                if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line:
                    yield line

    
if __name__ == "__main__":    
    start = sys.argv[1]
    end = sys.argv[2]
    files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)]    

    f = find_str(files)
    for i in f:
        print(i)

第二次改进版本,由于 py2 不支持 yield from 语句,gen_concatenate() 可能有点绕;详细看代码:

#/usr/bin/python

#2016-04-12

#search string in the binlogs

#usage:
#put this file into binlog-dir,exec as "python test.py 111 123 update" or "python test.py 111 123 update and insert" or "python test.py 111 123 update or delete"
#the nums are the binlog-num.

import sys
import subprocess

def find_str(files):
    print(sys.argv)
    for file in files:
        comm = "mysqlbinlog {0}".format(file)
        lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
        lines = lines.stdout.readlines()
        yield lines  #此处返回的是生成器对象

def gen_concatenate(lines):
  #将多个生成器对象迭代返回
for i in lines: for it in i: yield it def gen_grep(lines): for line in lines: line = line.lower() if len(sys.argv) == 4: if sys.argv[3] + ' ' in line: yield line elif len(sys.argv) == 6 and sys.argv[4] == "and": if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line: yield line elif len(sys.argv) == 6 and sys.argv[4] == "or": if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line: yield line if __name__ == "__main__": start = sys.argv[1] end = sys.argv[2] files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)] f = find_str(files) lines = gen_concatenate(f) greplines = gen_grep(lines) for i in greplines: print(i)

脚本1理解起来更加容易,将实现功能全部封装在一个函数体内;

更新:

使用了 re 正则匹配,有时候日志里面记录的表名是带反引号的,比如`user`这样,见代码:

#/usr/bin/python

#2016-04-27

#search string in the binlogs

#usage:
#put this file into binlog-dir,exec as "python test.py 111 123 update" or "python test.py 111 123 update and insert" or "python test.py 111 123 update or delete"
#the nums are the binlog-num.

import sys
import subprocess
import re

def find_str(files):
    print(sys.argv)
    for file in files:
        comm = "mysqlbinlog {0}".format(file)
        lines = subprocess.Popen(comm, shell=True, stdout=subprocess.PIPE)
        lines = lines.stdout.readlines()
        yield lines

def gen_concatenate(lines):
    for i in lines:
        for it in i:
            yield it

def gen_grep(lines):
    for line in lines:
        line = line.lower()
        if len(sys.argv) == 4:
            # if sys.argv[3] + ' ' in line:
            #     yield line
            str1 = sys.argv[3]
            str1_match = re.search(str1, line, re.I)
            if str1_match is not None:
                yield line
        elif len(sys.argv) == 6 and sys.argv[4] == "and":
            # if sys.argv[3] + ' ' in line and sys.argv[5] + ' ' in line:
            #     yield line
            str1 = sys.argv[3]
            str2 = sys.argv[5]
            if re.search(r'{}'.format(str1), line, re.I) is not None and re.search(r'{}'.format(str2), line, re.I):
                yield line
        elif len(sys.argv) == 6 and sys.argv[4] == "or":
            # if sys.argv[3] + ' ' in line or sys.argv[5] + ' ' in line:
            #     yield line
            str1 = sys.argv[3]
            str2 = sys.argv[5]
            if re.search(r'{}'.format(str1), line, re.I) is not None and re.search(r'{}'.format(str2), line, re.I):
                yield line

if __name__ == "__main__":
    start = sys.argv[1]
    end = sys.argv[2]
    files = ["updatelog.{0:06d}".format(i) for i in range(int(start), int(end)+1)]

    f = find_str(files)
    lines = gen_concatenate(f)
    greplines = gen_grep(lines)
    for i in greplines:
        print(i)

search.py
search.py
原文地址:https://www.cnblogs.com/bvac/p/5384756.html