python 自定义去掉特殊字符串

方法1.

def get_all_news(self, response, file):
    rm_list = [
        # r'</',
        # r'p>',
        # r'<',
    ]
    ret = response.xpath(
        '//p[not (img)][not (a)][position()>5] | //div[contains(@class,"WB_text")]').extract()  # 读取p标签但是不含有img的
    for i in ret:
        for rm in rm_list:
            if i[:-1].strip():
                if rm in i:
                    i = i.replace(rm, '').strip()
            else:
                i = ''
        file.write(i + '\t\n')

方法2.


PRE_PROCESSING_EXPRESSION=\
(
    ('[-]','负'),
    ('[\(]',  '左括号'),
    ('[\)]' , '右括号')
    #......
)

def processSpeakingTxt(txtStr):
    global PRE_PROCESSING_EXPRESSION
    for (reg_Expression, replaceingStr) in PRE_PROCESSING_EXPRESSION:
        if re.search(reg_Expression, txtStr) is not None:
            txtStr = re.sub(reg_Expression, replaceingStr, txtStr)
    return txtStr

方法3. 修改文件名


def change_filename2(path,str):
    files = [f for f in os.listdir(path) if str in f]
    print(files)
    for f in files:
        g= f.replace(str,'')
        os.chdir(path)
        try:
            if not os.path.exists(g):
                os.rename(f, g)
        except Exception as e:
            print(e)
原文地址:https://www.cnblogs.com/amize/p/13888832.html