删除c,c++,java源文件中全部注释的Python脚本

#!D:\Python32
# -*- coding: utf-8-*-
# 过滤JAVA程序中的注释
# 如果字符串中有注释符号的话会有问题。

import os
import re
import io

# 改这个目录!!!
top_dir = "E:\\work2\\";

# 状态
S_INIT              = 0;
S_SLASH             = 1;
S_BLOCK_COMMENT     = 2;
S_BLOCK_COMMENT_DOT = 3;
S_LINE_COMMENT      = 4;
S_STR               = 5;
S_STR_ESCAPE        = 6;

def trim_dir(path):
  print("dir:" + path);
  for root, dirs, files in os.walk(path):
    for name in files:
      trim_file(os.path.join(root, name))
      
    #for name in dirs:
      #trim_dir(os.path.join(root, name))

def trim_file(path):
  print("file:" + path);
  if re.match(r".*?\.(java|c|cpp|h)$", path):
    print("process");
  else:
    print("ignore");
    return;

  bak_file = path + ".bak";
  try:
    os.rename(path, bak_file);
  except:
    print "bak except",bak_file;

  fp_src = open(bak_file);
  fp_dst = open(path, 'w');
  state = S_INIT;
  for line in fp_src.readlines():
    for c in line:
      if state == S_INIT:
        if c == '/':
          state = S_SLASH;
        elif c == '"':
          state = S_STR;
          fp_dst.write(c);
        else:
          fp_dst.write(c);
      elif state == S_SLASH:
        if c == '*':
          state = S_BLOCK_COMMENT;
        elif c == '/':
          state = S_LINE_COMMENT;
        else:
          fp_dst.write('/');
          fp_dst.write(c);
          state=S_INIT;
      elif state == S_BLOCK_COMMENT:
        if c == '*':
          state = S_BLOCK_COMMENT_DOT;
      elif state == S_BLOCK_COMMENT_DOT:
        if c == '/':
          state = S_INIT;
        elif c=='*':
          state=S_BLOCK_COMMENT_DOT; #再次碰到*号还是要继续状态,否则会出错
        else:
          state = S_BLOCK_COMMENT;
      elif state == S_LINE_COMMENT:
        if c == '\n':
          state = S_INIT;
          fp_dst.write(c);
      elif state == S_STR:
        if c == '\\':
          state = S_STR_ESCAPE;
        elif c == '"':
          state = S_INIT;
        fp_dst.write(c);
      elif state == S_STR_ESCAPE:
        # 这里未完全实现全部序列,如\oNNN \xHH \u1234 \U12345678,但没影响
        state = S_STR; 
        fp_dst.write(c);

  fp_src.close();
  fp_dst.close();
  #os.remove(bak_file);
trim_dir(top_dir);

  需要说明的是这段程序来自:http://blog.csdn.net/codearhat/article/details/6852483#comments

但是里面有两个问题,会引起错误,现在经验证可以在我的项目中使用,但是不保证完全没有错误,有错误希望和我联系,也可以和原作者联系

原文地址:https://www.cnblogs.com/baizx/p/2850633.html