合并两个有序单链表

在归并排序中，对顺序存储的且为升序的两个列表a和b进行合并,合并后的列表为c，实现如下：

 1 /**
 2  * Merge two sorted src array a[] and b[] to dst array c[]
 3  */
 4 void merge0(int c[],  size_t nc, int a[], size_t na, int b[], size_t nb)
 5 {
 6         int i = 0; /* walk array a : read  */
 7         int j = 0; /* walk array b : read  */
 8         int k = 0; /* walk array c : write */
 9 
10         while (i < na && j < nb) {
11                 int t = 0;
12 
13                 if (a[i] < b[j]) {
14                         t = a[i];    /* save a[i] to t */
15                         i++;         /* move index of a[] forward */
16                 } else {
17                         t = b[j];    /* save b[j] to t */
18                         j++;         /* move index of b[] forward */
19                 }
20 
21                 c[k] = t;            /* now save x to c[k] */
22                 k++;                 /* move index of c[] forward */
23         }
24 
25         /* copy the left of a[] to c[] */
26         while (i < na)
27                 c[k++] = a[i++];
28 
29         /* copy the left of b[] to c[] */
30         while (j < nb)
31                 c[k++] = b[j++];
32 }

那么，如何合并两个有序的按升序排列的单链表呢？方法有三：

方法一: 将链表a和链表b的每一个结点的地址都dump出来，转化为顺序存储处理（设存入 A[]和B[]），然后使用上面的merge0()算法，设合并后的存储数组为C[], 最后将C[]的结点地址重新组织为一个单链表。这个方法的实现起来比较容易，但是时间复杂度和空间复杂度都比较高。
方法二: 使用链式插入排序，假设链表a的头结点的数据域较小，那么可以遍历链表b的每一个结点，将结点逐个插入到链表a中。这一方法实现起来不是很容易，因为链式插入排序的实现相对复杂。另外，这一方法的时间复杂度也不是很高。
方法三: 仿照顺序存储列表的合并方法对单链表a和b进行合并，时间复杂度很不错，只是实现起来不是很直观，也不是很容易。

方法一

  1 static int
  2 get_list_length(list_t *head)
  3 {
  4         int len = 0;
  5         for (list_t *p = head; p != NULL; p = p->next)
  6                 len++;
  7         return len;
  8 }
  9 
 10 static void
 11 dump_list_node_addr(list_t *head, uintptr_t **saveto, int *saveto_sz)
 12 {
 13         int len = get_list_length(head);
 14 
 15         uintptr_t *aux = (uintptr_t *)malloc(sizeof (uintptr_t) * len);
 16         if (aux == NULL) {
 17                 *saveto = NULL;
 18                 *saveto_sz = 0;
 19                 return;
 20         }
 21 
 22         int index = 0;
 23         for (list_t *p = head; p != NULL; p = p->next)
 24                 aux[index++] = (uintptr_t)p;
 25 
 26         *saveto = aux;
 27         *saveto_sz = len;
 28 }
 29 
 30 static void
 31 merge0(uintptr_t *c, int nc, uintptr_t *a, int na, uintptr_t *b, int nb)
 32 {
 33         int i = 0;
 34         int j = 0;
 35         int k = 0;
 36 
 37         while (i < na && j < nb) {
 38                 if (((list_t *)a[i])->data < ((list_t *)b[j])->data)
 39                         c[k++] = a[i++];
 40                 else
 41                         c[k++] = b[j++];
 42         }
 43 
 44         while (i < na)
 45                 c[k++] = a[i++];
 46 
 47         while (j < nb)
 48                 c[k++] = b[j++];
 49 }
 50 
 51 /**
 52  * Merge two sorted single linked lists (dst and src).
 53  */
 54 list_t *
 55 merge1(list_t *head1, list_t *head2)
 56 {
 57         if (head1 == NULL)
 58                 return head2;
 59 
 60         if (head2 == NULL)
 61                 return head1;
 62 
 63         list_t *out = NULL;
 64 
 65         uintptr_t *a = NULL;
 66         uintptr_t *b = NULL;
 67         uintptr_t *c = NULL;
 68         int na = 0;
 69         int nb = 0;
 70         int nc = 0;
 71 
 72         /* 1. dump the address of per node of list 1 to a[] */
 73         dump_list_node_addr(head1, &a, &na);
 74         if (a == NULL)
 75                 goto done;
 76 
 77         /* 2. dump the address of per node of list 2 to a[] */
 78         dump_list_node_addr(head2, &b, &nb);
 79         if (b == NULL)
 80                 goto done;
 81 
 82         /* 3. alloc memory for c[] */
 83         nc = na + nb;
 84         c = (uintptr_t *)malloc(sizeof (uintptr_t) * nc);
 85         if (c == NULL)
 86                 goto done;
 87         memset(c, 0, nc);
 88 
 89         /* 4. merge a[] and b[] to c[] */
 90         merge0(c, nc, a, na, b, nb);
 91 
 92         /* 5. rebuild dst single linked list according to c[] */
 93         for (int i = 0; i < nc - 1; i++)
 94                 ((list_t *)c[i])->next = (list_t *)c[i+1];
 95         ((list_t *)c[nc-1])->next = NULL;
 96         out = (list_t *)c[0];
 97 
 98 done:
 99         if (c != NULL) free(c);
100         if (b != NULL) free(b);
101         if (a != NULL) free(a);
102 
103         return out;
104 }

在上面的方法中，假设链表a的长度为na, 链表b的长度为nb, 一个指针的大小为8个字节(64位处理器上)，那么我们使用的辅助存储为 8 * (na + nb) * 2。而时间复杂度，大约是O(4*(na+nb))。方法虽然比较笨，但是要写出上面的代码，需要对指针的本质有深刻的理解。

方法二

 1 /**
 2  * Insertion Sort on a Single Linked List : insert a node to the sorted list
 3  */
 4 static void
 5 list_insert(list_t **head, list_t *node)
 6 {
 7         if (*head == NULL) {
 8                 *head = node;
 9                 return;
10         }
11 
12         /* get both prev and next of the node to insert */
13         list_t *node_prev = *head;
14         list_t *node_next = NULL;
15         for (list_t *p = *head; p != NULL; p = p->next) {
16                 if (p->data <= node->data) {
17                         node_prev = p;
18                         continue;
19                 }
20 
21                 node_next = p;
22                 break;
23         }
24 
25         if (node_next == NULL) { /* append node to the tail */
26                 node_prev->next = node;
27         } else {
28                 if (node_next == node_prev) { /* == *head */
29                         node->next = *head;
30                         *head = node;
31                         return;
32                 }
33 
34                 /* node_prev -> node -> node_next */
35                 node_prev->next = node;
36                 node->next = node_next;
37         }
38 }
39 
40 /**
41  * Merge two sorted single linked lists (dst and src).
42  */
43 list_t *
44 merge2(list_t *head1, list_t *head2)
45 {
46         if (head1 == NULL)
47                 return head2;
48 
49         if (head2 == NULL)
50                 return head1;
51 
52         /* now merge the two lists */
53         list_t *out = NULL;
54         list_t *p = NULL;
55         if (head1->data < head2->data) {
56                 out = head1;
57                 p = head2;
58         } else {
59                 out = head2;
60                 p = head1;
61         }
62 
63         /*
64          * insert per node of list 'p' to the dst list one by one, and always
65          * pick up the previous node inserted as the new head for getting good
66          * time complexity once list_insert() is called
67          */
68         list_t *head = out;
69         while (p != NULL) {
70                 list_t *this = p;
71                 p = p->next;
72                 this->next = NULL;
73                 list_insert(&head, this);
74                 head = this;
75         }
76 
77         return out;
78 }

本方法最关键的是需要实现链式插入排序的核心函数list_insert()。时间复杂度大约在O(na+nb)，但实现的主体函数merge2()非常容易理解。之所以说这个方法的时间效率还不够高，是因为是需要遍历开始结点数据域较大的那个链表的每一个结点。（对照方法三的实现，你就会发现此言不虚:-))

方法三

 1 static void
 2 list_insert_node_tail(list_t **head, list_t *tail, list_t *node)
 3 {
 4         if (tail == NULL)
 5                 *head = node;
 6         else
 7                 tail->next = node;
 8 }
 9 
10 list_t *
11 merge(list_t *head1, list_t *head2)
12 {
13         list_t *out = NULL;
14         list_t *tail = out;
15         list_t *p1 = head1;
16         list_t *p2 = head2;
17 
18         while (p1 != NULL && p2 != NULL) {
19                 list_t *node = NULL;
20 
21                 if (p1->data < p2->data) {
22                         node = p1;      /* 1. save p1 to node */
23                         p1 = p1->next;  /* 2. move p1 forward */
24                 } else {
25                         node = p2;      /* 1. save p2 to node */
26                         p2 = p2->next;  /* 2. move p2 forward */
27                 }
28 
29                 node->next = NULL;      /* 3. cut node's next off */
30                                         /* 4. append node to out */
31                 list_insert_node_tail(&out, tail, node);
32                 tail = node;            /* 5. update the tail */
33         }
34 
35         if (p1 != NULL) /* link the left of list 1 to the tail of out */
36                 list_insert_node_tail(&out, tail, p1);
37 
38         if (p2 != NULL) /* link the left of list 2 to the tail of out */
39                 list_insert_node_tail(&out, tail, p2);
40 
41         return out;
42 }

这才是一个高效的实现方法，因为时间复杂度为O(na+nb), 空间复杂度为O(1)。

小结： 链表操作体现的是工程师的编程功底，如果你在面试中遇到这样的问题，方法三通常是面试官所期待的。但是，实在没有办法在短时间内想明白的话，方法一和方法二也是可以的，至少表明你是有想法的程序员。完整代码实现戳这里。