重复造轮子系列--字符串处理(C语言)

这些字符代码是以前写的,源于很久很久以前的一个VC++项目,在当时的部门编程比赛里因为用了项目代码的xsplit函数,万万没想到,那个做了几年的项目里面居然有坑。。xsplit函数居然不能split连续2个空格,囧,领导说,你要是用ruby你就满分了,让我与满分失之交臂,当时没有人得满分,因此记忆深刻;

后来又是boost C++库流行,这个我就不说了,用过的都说好,但是也有些小麻烦,就是用的多了,编译就特别慢,那个时候还不知道用incredbuild,于是乎就在代码上下功夫了。

做了一些常用的字符操作,基本上python string的函数基本实现了,放在博客里,也可以温故知新。

xstring.h

 1 #ifndef XSTRING
 2 #define XSTRING
 3 
 4 typedef struct xstring {
 5     char *str;
 6     struct xstring *next;
 7 } xstring; 
 8 
 9 
10 //////////////////////////////////////////////////////////////////////////
11 void*    allocate(size_t size);
12 
13 #ifdef USE_STD_MALLOC
14 #define dellocate(ptr) free(ptr);
15 #else
16 #define dellocate(ptr) mem_ret(ptr);
17 #endif
18 
19 //////////////////////////////////////////////////////////////////////////
20 xstring* xstring_new(size_t size);
21 void     xstring_delete(xstring **head);
22 int      xstring_size(xstring *head);
23 
24 //////////////////////////////////////////////////////////////////////////
25 size_t   count(char* src, char* dst);
26 char*    replace(char *src, char *old_val, char *new_val);
27 xstring* split(char *str, char *delimter);
28 char*    strip(char *str);
29 char*    lstrip(char *str);
30 char*    rstrip(char *str);
31 int      start_with(char *str, char *sym);
32 int      end_with(char *str, char *sym);
33 char*    uniq_seq_repeat_chars(char *str);
34 
35 #endif

xstring.c

  1 #include <stdlib.h>
  2 #include <string.h>
  3 #include <ctype.h>
  4 #include <assert.h>
  5 #include "xstring.h"
  6 #include "mem_pool.h"
  7 
  8 void* allocate(size_t size) {
  9 #ifdef USE_STD_MALLOC
 10     return malloc(size);
 11 #else
 12     return mem_get(size);
 13 #endif
 14 }
 15 
 16 xstring* xstring_new(size_t size) {
 17     xstring *s = (xstring *)allocate(sizeof(xstring));
 18     if (!s) return NULL;
 19 
 20     s->str = (char *)allocate(size+1);
 21     if (!s->str) {
 22         dellocate(s);
 23         return NULL;
 24     }
 25 
 26     s->next = NULL;
 27     return s;
 28 }
 29 
 30 void xstring_delete(xstring** head) {
 31     xstring *curr = *head;
 32     xstring *next;
 33 
 34     while(curr) {
 35         next = curr->next;
 36         if (curr->str) dellocate(curr->str);
 37         dellocate(curr);
 38         curr = next;
 39     }
 40     *head = NULL;
 41     return;
 42 }
 43 
 44 int xstring_size(xstring* head) {
 45     int size = 0;
 46     while (head) {
 47         size++;
 48         head = head->next;
 49     }
 50     return size;
 51 }
 52 
 53 static void string_copy(char *dst, char *src, int len) {
 54     if (!dst || !src) return;
 55     strncpy(dst, src, len);
 56     dst[len] = '';
 57     return;
 58 }
 59 
 60 static void substring_copy(xstring **curr, xstring *next, char *str, int len) {
 61     string_copy(next->str, str, len);
 62     (*curr)->next = next;
 63     (*curr) = (*curr)->next;
 64 }
 65 
 66 xstring* split(char* str, char *delimter) {
 67     char *delimt, *buffer;
 68     int i = 0;
 69     int len, match, cnt;
 70     xstring *head = NULL;
 71     xstring *next, *curr;
 72     
 73     if (NULL == str || NULL == delimter) return NULL;
 74 
 75     delimt = delimter;
 76     len    = strlen(delimter);
 77     buffer = str;
 78     match  = 0;
 79     cnt    = 0;
 80 
 81     while (*buffer != '') {
 82         if (*buffer == *delimt) {
 83             delimt++;
 84             match++; 
 85         } else if (*buffer != *delimt) {
 86             delimt = delimter;
 87         }
 88 
 89         if (match == len) {
 90             if (NULL == head && cnt > 0) {
 91                 head = xstring_new(cnt);
 92                 if (!head) return NULL;
 93                 string_copy(head->str, str+i-cnt, cnt+1-len);
 94                 curr = head;
 95             } else if (cnt > 0){
 96                 next = xstring_new(cnt);
 97                 if (!next) return NULL;
 98                 substring_copy(&curr, next, str+i-cnt, cnt+1-len);
 99             }
100             cnt    = 0;
101             match  = 0;
102             delimt = delimter;
103         } else {
104             cnt++;
105         } 
106         i++;
107         buffer++;
108     }
109 
110     if (cnt > 0) {
111         next = xstring_new(cnt);
112         if (!next) return NULL;
113         substring_copy(&curr, next, str+i-cnt, cnt);
114     }
115     return head;
116 }
117 
118 size_t count(char* src, char* dst) {
119     size_t cnt = 0;
120     char *buf = src;
121     char *tmp = dst;
122     int sum = 0;
123     size_t len = strlen(dst);
124 
125     while (*buf != '') {
126         if (*buf == *tmp) {
127             sum++;
128             tmp++;
129         } else {
130             tmp = dst;
131             sum = 0;
132         }
133 
134         if (sum == len) {
135             tmp = dst;
136             sum = 0;
137             cnt++;
138         }
139         buf++;
140     }
141     return cnt;
142 }
143 
144 char* replace(char *src, char *old_val, char *new_val) {
145     if (!src || !old_val || !new_val) return NULL;
146     size_t cnt = count(src, old_val);
147     if (0 == cnt) return NULL;
148 
149     size_t o_len = strlen(old_val);
150     size_t n_len = strlen(new_val);
151     size_t len   = strlen(src)  + cnt * (n_len - o_len) + 1;
152 
153     char *new_str = (char *)allocate(len);
154     if (!new_str) return NULL;
155     
156     char *str = new_str;
157     char *buf = src;
158     char *tmp = old_val;
159     int   sum = 0;
160 
161     while (*buf != '') {
162         if (*buf == *tmp) {
163             sum++;
164             tmp++;
165         } else if (sum > 0){
166             tmp = old_val;
167             sum = 0;
168         }
169 
170         if (sum == 0) *new_str++ = *buf;
171         if (sum == o_len) {
172             tmp = old_val;
173             sum = 0;
174             for (size_t i=0; i<n_len; i++) *new_str++ = new_val[i];
175         }
176         buf++;
177     }
178 
179     for(; sum>0; sum--) *new_str++ = *(buf-sum); 
180     *(str+len-1) = '';
181 
182     return str;
183 }
184 
185 char* lstrip(char *str) {
186     if (NULL == str) return NULL;
187 
188     char *tmp = str;
189     int i = 0;
190 
191     while (isspace(*tmp++)) i++;
192     int len = strlen(str) - i;
193 
194     char *s = (char *)allocate(len + 1);
195     if (NULL == s) return NULL;
196 
197     string_copy(s, str+i, len);
198     return s;
199 }
200 
201 char* rstrip(char *str) {
202     if (NULL == str) return NULL;
203     char *tmp = str;
204     int len = strlen(str) - 1;
205 
206     while (isspace(*(str+len))) len--;
207 
208     char *s = (char *)allocate(len + 2);
209     if (NULL == s) return NULL;
210     
211     string_copy(s, str, len+1);
212     return s;
213 }
214 
215 char* strip(char *str) {
216     if (NULL == str) return NULL;
217 
218     char *tmp = str;
219     int len = strlen(str)-1;
220     int i = 0;
221 
222     while(isspace(*tmp++)) i++;
223     while(isspace(*(str+len))) len--;
224 
225     char *s = (char *)allocate(len-i+2);
226     if (NULL == s) return NULL;
227 
228     string_copy(s, str+i, len-i+1);
229     return s;
230 }
231 
232 int start_with(char *str, char *sym) {
233     if (!str || !sym || 0 == strlen(sym)) return -1;
234     return strncmp(str, sym, strlen(sym));
235 }
236 
237 int end_with(char *str, char *sym) {
238     if (!str || !sym) return -1;
239     int l_len = strlen(str);
240     int r_len = strlen(sym);
241 
242     if (l_len < r_len ) return -1;
243     return strncmp(str+l_len-r_len, sym, r_len);
244 }
245 
246 int max(int *in_arr, int len) {
247     assert(NULL != in_arr);
248     int m = in_arr[0];
249     for(int i=0; i<len; i++)
250         if (m < in_arr[i]) m = in_arr[i];
251     return m;
252 }
253 
254 char *uniq_seq_repeat_chars(char *str) {
255     if (NULL == str) return NULL;
256 
257     int rec[256] = {0};
258     char *s = (char *)allocate(strlen(str)+1);
259     if (NULL == s) return NULL;
260     char *s1 = s;
261     
262     for (int i=0; i<(int)strlen(str); i++) {
263         char ch = str[i];
264         if (rec[ch] == 0 || 1 != i-rec[ch]) {
265             *s = ch;
266             s++;
267         }
268         rec[ch] = i;
269     }
270     *s = '';
271     return s1;
272 }
原文地址:https://www.cnblogs.com/danxi/p/6369990.html