Python常用的文件读写操作和字符串操作

文件读写操作

fileUtils.py

# -*- coding: utf-8 -*-

import os

def getFileList(dir, fileList=[]):
"""
遍历一个目录,输出所有文件名
param dir: 待遍历的文件夹
param filrList : 保存文件名的列表
return fileList: 文件名列表
"""
newDir = dir
if os.path.isfile(dir):
fileList.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
# 如果需要忽略某些文件夹，使用以下代码
# if s == "xxx":
# continue
newDir = os.path.join(dir, s)
getFileList(newDir, fileList)
return fileList

def readStrFromFile(filePath):
"""
从文件中读取字符串str
param filePath: 文件路径
return string : 文本字符串
"""
with open(filePath, "rb") as f:
string = f.read()
return string

def readLinesFromFile(filePath):
"""
从文件中读取字符串列表list
param filePath: 文件路径
return lines : 文本字符串列表
"""
with open(filePath, "rb") as f:
lines = f.readlines()
return lines

def writeStrToFile(filePath, string):
"""
将字符串写入文件中
param filePath: 文件路径
param string : 字符串str
"""
with open(filePath, "wb") as f:
f.write(string)

def appendStrToFile(filePath, string):
"""
将字符串追加写入文件中
param filePath: 文件路径
param string : 字符串str
"""
with open(filePath, "ab") as f:
f.write(string)

def dumpToFile(filePath, content):
"""
将数据类型序列化存入本地文件
param filePath: 文件路径
param content : 待保存的内容(list, dict, tuple, ...)
"""
import pickle
with open(filePath, "wb") as f:
pickle.dump(content, f)

def loadFromFile(filePath):
"""
从本地文件中加载序列化的内容
param filePath: 文件路径
return content: 序列化保存的内容(e.g. list, dict, tuple, ...)
"""
import pickle
with open(filePath) as f:
content = pickle.load(f)
return content

字符串操作

zhuanma.py

# -*- coding: utf-8 -*-
import os
import sys

try:
import pkg_resources
get_module_res = lambda *res: pkg_resources.resource_stream(__name__,os.path.join(*res))
except ImportError:
get_module_res = lambda *res: open(os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__), *res)), 'rb')

PY2 = sys.version_info[0] == 2

default_encoding = sys.getfilesystemencoding()

if PY2:
text_type = unicode
string_types = (str, unicode)

iterkeys = lambda d: d.iterkeys()
itervalues = lambda d: d.itervalues()
iteritems = lambda d: d.iteritems()

else:
text_type = str
string_types = (str,)
xrange = range

iterkeys = lambda d: iter(d.keys())
itervalues = lambda d: iter(d.values())
iteritems = lambda d: iter(d.items())

def strdecode(sentence):
if not isinstance(sentence, text_type):
try:
sentence = sentence.decode('utf-8')
except UnicodeDecodeError:
sentence = sentence.decode('gbk', 'ignore')
return sentence

def resolve_filename(f):
try:
return f.name
except AttributeError:
return repr(f)

stringUtils.py

# -*- coding: utf-8 -*-

import zhuanma

def jiema(string):
"""
将字符串转为unicode编码
param string: 待转码的字符串
return : unicode编码的字符串
"""
from zhuanma import strdecode
return strdecode(string)

def filterReturnChar(string):
"""
过滤字符串中的" "字符
:param string:
:return: 过滤了" "的字符串
"""
return string.replace(" ", "")

def encodeUTF8(string):
"""
将字符串转码为UTF-8编码
:param string:
:return: UTF-8编码的字符串
"""
return jiema(string).encode("utf-8")

def filterCChar(string):
"""
过滤出字符串中的汉字
:param string: 待过滤字符串
:return: 汉字字符串
"""
import re
hanzi = re.compile(u"[u4e00-u9fa5]+", re.U)
return "".join(re.findall(hanzi, string))