台电k6你立功了

等了一年,还是停留在v2.11的固件上,英文就一渣排版。只能自己动手了

写了个英文排版的,两种排版方式,一是按最小破损度进行排版,最小破损度追求的是文章排版的整齐性,也就是行尾空格数目尽可能相等。这样外观上就会比较平整,当然,行尾的空格可以移到行内,使得每行刚好满足给定宽度。另一种是按最小行数进行排版。这种排版相当直观,就是把原文一个词一个词地放入一行中,当不能再放入时,就进行换行。

源码:

#!/usr/bin/env python
#
encoding=utf8

#最小破损度排版
import sys

cache
= {}
def cache_wrapper(func):
global cache
def cache_loader(*args):
if args in cache:
return cache[args]
cache[args]
= func(*args)
return cache[args]
return cache_loader

def min_cost_cache_wrapper(func):
def loader(alist, jth):
global cache
if jth in cache:
return cache[jth]
cache[jth]
= func(alist, jth)
return cache[jth]
return loader

@cache_wrapper
def total_cost(jth_word):
word_in_one_line
= cost(0, jth_word)
if word_in_one_line != 2**32:
return word_in_one_line
cost_list
= []
for k in range(0, jth_word):
cost_list.append(total_cost(k)
+ cost(k+1, jth_word))
return min_cost(cost_list, jth_word)

@cache_wrapper
def cost(from_ith, to_jth):
global linewidth
global len_words
result
= linewidth - (to_jth - from_ith) - sum([len_words[k] for k in range(from_ith, to_jth + 1)])
if result < 0:
return 2**32
return result*result

@min_cost_cache_wrapper
def min_cost(cost_list, jth_word):
min_val
= 2**32
min_index
= -1
for index, cost in enumerate(cost_list):
if min_val > cost:
min_val
= cost
min_index
= index
global choose
choose[jth_word]
= min_index
return min_val

def print_paragraph(choose_dict):
divided_points
= get_divided_point(choose_dict)
#print "divided_points: ", divided_points
print_lines(divided_points)

def get_divided_point(choose_dict):
global words
#print "choose dict:", choose_dict
word_index = len(words) - 1
indexes
= [word_index]
while True:
try:
indexes.append(choose_dict[word_index])
word_index
= choose_dict[word_index]
except KeyError:
indexes.reverse()
return indexes

def print_lines(indexes):
pre
= 0
for i in indexes:
#print "print words:(%d, %d)" %(pre, i)
print_words(pre, i)
pre
= i + 1

def print_words(ith, jth):
global words
for k in range(ith, jth + 1):
if k != jth:
print words[k],
else:
print words[k]

if __name__ == '__main__':
if len(sys.argv) != 2:
print "usage: ./wordwrap width 1<content"
exit(
-1)
linewidth
= int(sys.argv[1])
lines
= sys.stdin.readlines()
for content in lines:
cache
= {}
if len(content) == 0:
continue
words
= content.split()
len_words
= [len(word) for word in words]
choose
= {}
penalty
= total_cost(len(words) - 1)
print_paragraph(choose)
#print cache
#print choose

上述代码按动态规划做应该能够进一步提升效率,现在只是用了记忆化搜索,仔细想来调用函数的次数还是会比动态规划要多得多,而且函数调用的代价还是比较高的。另外一个问题,是无法直接对全文进行排版,估计是效率太低,一直没有输出,我是分段切开再行排版的,有可能出现上下两段间宽度不一致的情况。

#!/usr/bin/env python
#
encoding=utf8

#贪心法求最小行数排版,看来还是greedy is good啊
import sys

def print_lines(content):
global linewidth
words
= content.split()
space_width
= 1
space_left
= linewidth
for word in words:
if len(word) + space_width > space_left:
sys.stdout.write(
"\n")
sys.stdout.write(word)
space_left
= linewidth - len(word)
else:
space_left
= space_left - (len(word) + space_width)
sys.stdout.write(
" " + word)

if __name__ == '__main__':
if len(sys.argv) != 2:
print "usage: ./wordwrap width 1<content"
exit(
-1)
linewidth
= int(sys.argv[1])
content
= sys.stdin.read()
print_lines(content)
sys.stdout.write(
"\n\n")




原文地址:https://www.cnblogs.com/Lifehacker/p/word_wrap_in_two_ways_with_python.html