台电k6你立功了

等了一年，还是停留在v2.11的固件上，英文就一渣排版。只能自己动手了

写了个英文排版的，两种排版方式，一是按最小破损度进行排版，最小破损度追求的是文章排版的整齐性，也就是行尾空格数目尽可能相等。这样外观上就会比较平整，当然，行尾的空格可以移到行内，使得每行刚好满足给定宽度。另一种是按最小行数进行排版。这种排版相当直观，就是把原文一个词一个词地放入一行中，当不能再放入时，就进行换行。

源码：

#!/usr/bin/env python
#encoding=utf8

#最小破损度排版
import sys

cache = {}
def cache_wrapper(func):
    global cache
    def cache_loader(*args):
        if args in cache:
            return cache[args]
        cache[args] = func(*args)
        return cache[args]
    return cache_loader

def min_cost_cache_wrapper(func):
    def loader(alist, jth):
        global cache
        if jth in cache:
            return cache[jth]
        cache[jth] = func(alist, jth)
        return cache[jth]
    return loader    

@cache_wrapper
def total_cost(jth_word):
    word_in_one_line = cost(0, jth_word) 
    if word_in_one_line != 2**32:
        return word_in_one_line
    cost_list = []
    for k in range(0, jth_word):
        cost_list.append(total_cost(k) + cost(k+1, jth_word))
    return min_cost(cost_list, jth_word)

@cache_wrapper
def cost(from_ith, to_jth):
    global linewidth
    global len_words
    result = linewidth - (to_jth - from_ith) - sum([len_words[k] for k in range(from_ith, to_jth + 1)])
    if result < 0:
        return 2**32
    return result*result

@min_cost_cache_wrapper
def min_cost(cost_list, jth_word):
    min_val = 2**32
    min_index = -1
    for index, cost in enumerate(cost_list):
        if min_val > cost:
            min_val = cost    
            min_index = index
    global choose
    choose[jth_word] = min_index
    return min_val

def print_paragraph(choose_dict):
    divided_points = get_divided_point(choose_dict)
    #print "divided_points: ", divided_points
    print_lines(divided_points)

def get_divided_point(choose_dict):
    global words
    #print "choose dict:", choose_dict
    word_index = len(words) - 1
    indexes = [word_index]
    while True:
        try:
            indexes.append(choose_dict[word_index])
            word_index = choose_dict[word_index]
        except KeyError:
            indexes.reverse()
            return indexes

def print_lines(indexes):
    pre = 0
    for i in indexes:
        #print "print words:(%d, %d)" %(pre, i)
        print_words(pre, i)
        pre = i + 1

def print_words(ith, jth):
    global words
    for k in range(ith, jth + 1):
        if k != jth:
            print words[k],
        else:
            print words[k]

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: ./wordwrap width 1<content"
        exit(-1)
    linewidth = int(sys.argv[1])
    lines = sys.stdin.readlines()
    for content in lines:
        cache = {}    
        if len(content) == 0:
            continue
        words = content.split()
        len_words = [len(word) for word in words]
        choose = {}
        penalty = total_cost(len(words) - 1)
        print_paragraph(choose)    
    #print cache
    #print choose

上述代码按动态规划做应该能够进一步提升效率，现在只是用了记忆化搜索，仔细想来调用函数的次数还是会比动态规划要多得多，而且函数调用的代价还是比较高的。另外一个问题，是无法直接对全文进行排版，估计是效率太低，一直没有输出，我是分段切开再行排版的，有可能出现上下两段间宽度不一致的情况。

#!/usr/bin/env python
#encoding=utf8

#贪心法求最小行数排版，看来还是greedy is good啊
import sys

def print_lines(content):
    global linewidth
    words = content.split()
    space_width = 1
    space_left = linewidth
    for word in words:
        if len(word) + space_width > space_left:
            sys.stdout.write("\n")
            sys.stdout.write(word)
            space_left = linewidth - len(word)
        else:
            space_left = space_left - (len(word) + space_width)
            sys.stdout.write(" " + word)

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: ./wordwrap width 1<content"
        exit(-1)
    linewidth = int(sys.argv[1])
    content = sys.stdin.read()
    print_lines(content)    
    sys.stdout.write("\n\n")