代码行统计

Python代码:

 1 #encoding:utf-8
 2 
 3 import os
 4 import time
 5 import traceback
 6 
 7 totalFiles = 0
 8 totalStatics = 0
 9 totalError = 0
10 totalCount = 0
11 supportedTypes = (".c", ".cpp", ".h", ".cc", ".cxx", ".hpp")
12 
13 def CountOneFile(filePath):
14     fileCount = 0
15     try:
16         fin = open(filePath, "rt")
17         content = fin.read()
18         fileCount = content.count("
")
19         #print "[%5d] %s" %(fileCount, filePath)
20     except:
21         global totalError
22         totalError += 1
23         traceback.print_exc()
24     return fileCount
25 
26 def CountFiles(rootPath):
27     if os.path.isfile(rootPath):
28         global totalCount
29         global totalFiles
30         global totalStatics
31         totalFiles += 1
32         fileExten = os.path.splitext(rootPath)[1].lower()
33         if fileExten in supportedTypes:
34             totalStatics += 1
35             totalCount += CountOneFile(rootPath)
36     elif os.path.isdir(rootPath):
37         childPaths = os.listdir(rootPath)
38         for childPath in childPaths:
39             CountFiles(os.path.join(rootPath, childPath))
40     else:
41         print "[error] %s" %rootPath
42 
43 root = "D:\Codes\cmac_1225\pub"
44 
45 start = time.clock()
46 CountFiles(root)
47 end = time.clock()
48 print "Files: %d, Used: %d" %(totalFiles, totalStatics)
49 print "Lines: %d, Time: %d" %(totalCount, end-start)

C语言代码:

 1 /* SourceCount.h */
 2 #ifndef _SOURCE_COUNT_H
 3 #define _SOURCE_COUNT_H
 4 
 5 #ifndef NULL
 6 #define NULL 0
 7 #endif
 8 
 9 #define MAX_CMD_LEN 2048
10 #define MAX_PATH_LEN 256
11 #define MAX_THREAD_NUM 20
12 #define DEFAULT_THREAD_NUM 2
13 #define EXTEN_NUM 6
14 #define READ_BUFF_SIZE 1024
15 
16 #define CHECK_NULL_RET(var, ret) if(NULL == var) return ret;
17 #define PROC_THREAD_NUM(threadNum) ((threadNum > 0 && threadNum <= MAX_THREAD_NUM) ? threadNum : DEFAULT_THREAD_NUM)
18 #define RM_LINE_BREAK(path) if(strlen(path)>0 && path[strlen(path)-1] == '
') path[strlen(path)-1] = '';
19 #define STR_LOWER(str) strlwr(str)
20 
21 const char extensions[EXTEN_NUM][MAX_PATH_LEN] = {".c", ".cc", ".cxx", ".cpp", ".h", ".hpp"};
22 
23 #endif
  1 /* SourceCount.cpp */
  2 #include <cstdlib>
  3 #include <Windows.h>
  4 #include <stdio.h>
  5 #include <time.h>
  6 #include "SourceCount.h"
  7 
  8 HANDLE hMutex4File;
  9 HANDLE hMutex4Stat;
 10 
 11 FILE *pipe = NULL;
 12 int totalCount = 0;
 13 
 14 int CountFiles(const char *root, const int threadNum);
 15 DWORD WINAPI ThreadProc(LPVOID lpParameter);
 16 int CountOneFile(const char *filePath);
 17 int CheckFileExten(const char *exten);
 18 
 19 int main()
 20 {
 21     char testRoot[] = "D:\Codes\cmac_1225";
 22     char testThreadNum = 12;
 23 
 24     clock_t start = clock();
 25     CountFiles(testRoot, testThreadNum);
 26     printf("Total: %d, time: %d
", totalCount, clock()-start);
 27     system("pause");
 28     return 0;
 29 }
 30 
 31 int CountFiles(const char *root, const int threadNum)
 32 {
 33     HANDLE pHandles[MAX_THREAD_NUM];
 34     char command[MAX_CMD_LEN];
 35     int handleNum;
 36     int loop;
 37     int ret;
 38 
 39     CHECK_NULL_RET(root, 0);
 40     
 41     sprintf(command, "dir "%s" /s /b", root);
 42     pipe = _popen(command, "rt"); // 执行命令,生成管道
 43     CHECK_NULL_RET(pipe, 0);
 44 
 45     // 创建互斥量
 46     hMutex4File = CreateMutex(NULL, false, NULL);
 47     hMutex4Stat = CreateMutex(NULL, false, NULL);
 48 
 49     handleNum = PROC_THREAD_NUM(threadNum); // 确定Thread数
 50     for(loop = 0; loop < handleNum; loop++)
 51     {
 52         pHandles[loop] = CreateThread(NULL, 0, ThreadProc, NULL, 0, NULL); // 创建线程
 53     }
 54     WaitForMultipleObjects(handleNum, pHandles, true, INFINITE); // 等待所有线程执行完毕
 55     for(loop = 0; loop < handleNum; loop++)
 56     {
 57         CloseHandle(pHandles[loop]); // 释放HANDLE资源
 58     }
 59     _pclose(pipe); // 关闭管道
 60     return totalCount;
 61 }
 62 
 63 /// 进程函数
 64 DWORD WINAPI ThreadProc(LPVOID lpParameter)
 65 {
 66     char filePath[MAX_PATH_LEN];
 67     char fileExten[MAX_PATH_LEN];
 68     int ret;
 69     int lines;
 70 
 71     while(true)
 72     {
 73         WaitForSingleObject(hMutex4File, INFINITE); // 获取pipe互斥权限
 74         if(feof(pipe))
 75         {
 76             ReleaseMutex(hMutex4File);
 77             return 0;
 78         }
 79         fgets(filePath, MAX_PATH_LEN, pipe);
 80         ReleaseMutex(hMutex4File);
 81 
 82         RM_LINE_BREAK(filePath);
 83         _splitpath(filePath, NULL, NULL, NULL, fileExten);
 84         STR_LOWER(fileExten);
 85         ret = CheckFileExten(fileExten);
 86         if(ret == 1)
 87         {
 88             lines = CountOneFile(filePath);
 89             WaitForSingleObject(hMutex4Stat, INFINITE); // 获取stat互斥权限
 90             totalCount += lines;
 91             ReleaseMutex(hMutex4Stat);
 92         }
 93     }
 94     return 0;
 95 }
 96 
 97 int CountOneFile(const char *filePath)
 98 {
 99     FILE *fp = fopen(filePath, "rt");
100     int lineCount = 0;
101     int readBytes;
102     int loop;
103     unsigned char buff[READ_BUFF_SIZE];
104 
105     CHECK_NULL_RET(fp, 0);
106     while(!feof(fp)) 
107     {
108         readBytes = fread(buff, 1, READ_BUFF_SIZE, fp);
109         for(loop=0; loop<readBytes; loop++)
110         {
111             if(buff[loop] == 0x0a)
112             {
113                 lineCount++;
114             }
115         }
116     }
117     fclose(fp);
118     return lineCount;
119 }
120 
121 int CheckFileExten(const char *exten)
122 {
123     int loop;
124     for(loop=0; loop<EXTEN_NUM; loop++)
125     {
126         if(strcmp(exten, extensions[loop]) == 0)
127         {
128             return 1;
129         }
130     }
131     return 0;
132 }

 测试:

文件总数:21984

大小:991MB

满足条件文件总数:20274

代码总行数:6575561

Python用时:949s

C语言:

 4 线程,读取缓冲区 1024,用时80s
 6 线程,读取缓冲区 1024,用时66.7s
 8 线程,读取缓冲区 1024,用时53.5s
10 线程,读取缓冲区 1024,用时41.8s
10 线程,读取缓冲区 2048,用时16s
10 线程,读取缓冲区 4096,用时16s
12 线程,读取缓冲区 2048,用时14.9s

最佳:12线程,缓冲区2048

参数处理:

void PrintHelpInfo()
{
printf("代码行统计工具 ");
printf("SourceCount [drive:][path][filename] [ ...] [/E extension [ ...]] [/T threadnum]");
printf(" [/S buffsize] [/P [filename]] ");
printf(" ");
printf(" [drive:][path][filename] ");
printf(" 指定要统计的路径和/或文件。 ");
printf(" ");
printf(" /E extension 指定文件类型。 ");
printf(" /T threadnum 指定使用的线程数量。 ");
printf(" /S buffsize 指定文件读取缓冲区的大小。 ");
printf(" /P [filename] 输出文件统计结果[到文件]。 ");
printf(" ");
system("pause");
}

原文地址:https://www.cnblogs.com/rmthy/p/6376009.html