Python3 replace()方法
实例1
def main():
text = 'python3, word!'
text1 = text.replace('python3', 'Hello')
print(text1)
if __name__ == '__main__':
main()
以上实例输出结果如下:
Hello, wold!
实例2
#!/usr/bin/python3
str = "www.w3cschool.cc"
print ("菜鸟教程旧地址:", str)
print ("菜鸟教程新地址:", str.replace("w3cschool.cc", "runoob.com"))
str = "this is string example....wow!!!"
print (str.replace("is", "was", 3))
以上实例输出结果如下:
菜鸟教程旧地址: www.w3cschool.cc
菜鸟教程新地址: www.runoob.com
thwas was string example....wow!!!
实例3
obj = {
"name": "Blush Colour Infusion ",
"description": ""Long lasting, sheer powder blush provides 10 hours of buildable, natural-looking cheek color for all skin tones."",
"upc": "736150159878",
"page_id_variant": "12702098"
}
for k,v in obj.items():
sku_var =v.replace("r", "").replace("n", "").replace("t", "").replace('"', '')
print(sku_var)
以上实例输出结果如下:
Blush Colour Infusion
Long lasting, sheer powder blush provides 10 hours of buildable, natural-looking cheek color for all skin tones.
736150159878
12702098
re.sub()表示替换
实例1
import re
def main():
content = 'abc124hello46goodbye67shit'
list1 = re.findall(r'd+', content)
print(list1)
mylist = list(map(int, list1))
print(mylist)
print(sum(mylist))
print(re.sub(r'd+[hg]', 'foo1', content))
print()
print(re.sub(r'd+', '456654', content))
if __name__ == '__main__':
main()
以上实例输出结果如下:
# ['124', '46', '67']
# [124, 46, 67]
# 237
# abcfoo1ellofoo1oodbye67shit
# abc456654hello456654goodbye456654shit
python3 json数据转换之demjson模块
实例1
obj = '{name: "Blush Colour Infusion ",description: ""Long lasting, sheer powder blush provides 10 hours of buildable, natural-looking cheek color for all skin tones."",upc: "736150159878",page_id_variant: "12702098"}'
上面数据发现是有问题的,虽然是json格式,但是,key值都缺少引号。此时用json模块无法解析。
需要一个新的模块:demjson模块
sku_var = obj.replace("r", "").replace("n", "").replace("t", "").replace('"', '')
data1 = demjson.decode(obj)
print("data1",type(data1))
print("data1", data1)
data2 = demjson.encode(obj)
print("data2",type(data2))
print("data2",data2)
以上实例输出结果如下:
data1 =
data1 = {'name': 'Blush Colour Infusion ', 'description': '"Long lasting, sheer powder blush provides 10 hours of buildable, natural-looking cheek color for all skin tones."', 'upc': '736150159878', 'page_id_variant': '12702098'}
data2 =
data2 = "{name: "Blush Colour Infusion ",description: ""Long lasting, sheer powder blush provides 10 hours of buildable, natural-looking cheek color for all skin tones."",upc: "736150159878",page_id_variant: "12702098"}"
实例2
# -*- coding: utf-8 -*-
import demjson
s = '{a:"000001_Unit_1. Hi,Birdie.mp3",b:"000005_Unit_2. Good morning,Miss Wang..mp3",c:"000008_Unit_3. What's your name_.mp3"}'
data1 = demjson.decode(s)
print(data1)
print(type(data1))
data2 = demjson.encode(data1)
print(data2)
print(type(data2))
以上实例输出结果如下:
{'a': '000001_Unit_1. Hi,Birdie.mp3', 'b': '000005_Unit_2. Good morning,Miss Wang..mp3', 'c': "000008_Unit_3. What's your name_.mp3"}
{"a":"000001_Unit_1. Hi,Birdie.mp3","b":"000005_Unit_2. Good morning,Miss Wang..mp3","c":"000008_Unit_3. What's your name_.mp3"}
re.match函数
re.match尝试从字符串的起始位置匹配一个模式,如果不是起始位置匹配成功的话,match()就返回none。
函数语法:re.match(pattern, string, flags=0) flags是标志位
懒惰匹配
表达式 .* 的意思很好理解,就是单个字符匹配任意次,即贪婪匹配。
表达式 .*? 是满足条件的情况只匹配一次,即懒惰匹配
2.re.search方法
re.search扫描整个字符串并返回第一个成功的匹配。
函数语法:re.search(pattern, string, flags=0)
re.match与re.search的区别
re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;而re.search匹配整个字符串,直到找到一个匹配。
检索和替换
re.sub(pattern, repl, string, count=0)
pattern : 正则中的模式字符串。
repl : 替换的字符串,也可为一个函数。
string : 要被查找替换的原始字符串。
count : 模式匹配后替换的最大次数,默认 0 表示替换所有的匹配。
compile 函数
compile 函数用于编译正则表达式,生成一个正则表达式( Pattern )对象,供 match() 和 search() 这两个函数使用。
函数语法:re.compile(pattern[, flags])
findall
*match和 search是匹配一次 ,findall匹配所有
在字符串中找到正则表达式所匹配的所有子串,并返回一个列表,如果没有找到匹配的,则返回空列表。
函数语法:findall(string[, pos[, endpos]])
string 待匹配的字符串。
pos 可选参数,指定字符串的起始位置,默认为0。
endpos 可选参数,指定字符串的结束位置,默认为字符串的长度。
实例1
切克app
#!/usr/bin/python3
# -*-coding:utf-8 -*- #有中文一定要加上
```python
resp = __jp1({"respCode":0,"respData":{"id":"1176485632356237323","spuId":null,"content":"百搭小黄鞋 日常穿着频率top3n长裤 短裤 裙子 都好搭 最爱","mediaUrls":["n_v2ec8c3d36c2774874b4e503bda5e2e84d.jpg","n_v2b276f5b1fc6b4dfcb57f757835b6e6a4.jpg"],"channelVOs":[{"id":"1163399738536493056","name":"CHECK GIRL","description":"切克女孩们!参与话题 #CHECK GIRL#,秀美照,即有机会上切克开屏!现在在切克社区,发布各话题的相关图文,必得红包,最高1500元!详情见APP首页【发帖子拿红包】活动。","mediaUrl":"https://pic6.zhuanstatic.com/zhuanzh/n_v28f2bfbad22fc4bcb8b7edae8e03b5b03.png","score":10,"createTime":"1566211087246","state":1,"shareable":true,"countInfo":null,"jumpUrl":null}],"score":11948,"title":"Converse 1970 s 姜黄色","createTime":"1569331007452","status":2,"author":{"userId":"35076540909323","nickName":"闲置的小姑娘","avatar":"https://pic2.zhuanstatic.com/zhuanzh/n_v29fb79704b7eb4351b5b39b6910fa3fb9.png"},"likeUsers":[{"userId":"40533402881300","nickName":"黄明月","labelInfo":null,"spuInfo":null},"errorMsg":"null","errMsg":""})
respData = re.findall('.*?(({.*?}))',resp,re.S)[0]
print(respData)
以上实例输出结果如下:
{"respCode":0,"respData":{"id":"1176485632356237323","spuId":null,"content":"百搭小黄鞋 日常穿着频率top3n长裤 短裤 裙子 都好搭 最爱","mediaUrls":["n_v2ec8c3d36c2774874b4e503bda5e2e84d.jpg","n_v2b276f5b1fc6b4dfcb57f757835b6e6a4.jpg"],"channelVOs":[{"id":"1163399738536493056","name":"CHECK GIRL","description":"切克女孩们!参与话题 #CHECK GIRL#,秀美照,即有机会上切克开屏!现在在切克社区,发布各话题的相关图文,必得红包,最高1500元!详情见APP首页【发帖子拿红包】活动。","mediaUrl":"https://pic6.zhuanstatic.com/zhuanzh/n_v28f2bfbad22fc4bcb8b7edae8e03b5b03.png","score":10,"createTime":"1566211087246","state":1,"shareable":true,"countInfo":null,"jumpUrl":null}],"score":11948,"title":"Converse 1970 s 姜黄色","createTime":"1569331007452","status":2,"author":{"userId":"35076540909323","nickName":"闲置的小姑娘","avatar":"https://pic2.zhuanstatic.com/zhuanzh/n_v29fb79704b7eb4351b5b39b6910fa3fb9.png"},"likeUsers":[{"userId":"40533402881300","nickName":"黄明月","labelInfo":null,"spuInfo":null},"errorMsg":"null","errMsg":""}
re.split
*注:正则表达式[w]+,w+,[w+] 三者有何区别:
[w]+和w+没有区别,都是匹配数字和字母下划线的多个字符;
[w+]表示匹配数字、字母、下划线和加号本身字符;
函数语法:re.split(pattern, string[, maxsplit=0, flags=0])
pattern 匹配的正则表达式
string 要匹配的字符串。
maxsplit 分隔次数,maxsplit=1 分隔一次,默认为 0,不限制次数。
flags 标志位,用于控制正则表达式的匹配方式,如:是否区分大小写,多行匹配等等
第一种方法:文件夹中读取所有内容,正则之后写入到文件
import os
import sys
import re
import csv
import json
path = 'D:\auto_test' # 文件夹目录
files = os.listdir(path) # 得到文件夹下的所有文件名称
s = []
dic = {}
for file in files: # 遍历文件夹
if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开
f = open(path + "/" + file) # 打开文件
iter_f = iter(f) # 创建迭代器
str = ""
for line in iter_f: # 遍历文件,一行行遍历,读取文本
# str = line
resp = re.findall('.py (.*?) (.*?) ',line, re.M)
if resp:
if not dic.get(resp[0][0]):
dic[resp[0][0]]=1
else:
dic[resp[0][0]] += 2
print(dic)
#
# s.append(dic) # 每个文件的文本存到list中
# print(s) # 打印结果
with open('test.txt','w',encoding='utf8') as fw:
fw.write(json.dumps(dic))
第二种方法:文件夹中读取所有内容,正则之后写入到文件
import os
import sys
import re
path = os.getcwd()
print(path)
auto_bat = path+r'auto_test\'
auto_bat_paths = os.listdir(auto_bat)
dic = {}
for auto_bat_path in auto_bat_paths:
path_1=auto_bat+auto_bat_path
x=auto_bat_path.split('_')[1].replace('.bat','')
with open(path_1,'r',encoding='gbk') as fr:
data=fr.read()
a = re.findall('.py (.*?) (.*?) ',data,re.M)
if a:
if not dic.get(a[0][0]):
dic[a[0][0]]=1
else:
dic[a[0][0]]+=2
with open('1.txt','w',encoding='utf8') as fw:
fw.write(str(dic))
1366666666.bat 文件
@echo off
start cmd /c "python C:UsersAdministratorDesktopregister_test/aritest_script/register_script.py 172.31.38.13:4729 1366666666 xxxxx 161967 尹 军 10291984 2407582522@qq.com 男"
1388888888.bat 文件
@echo off
start cmd /c "python C:UsersAdministratorDesktopregister_test/aritest_script/register_script.py 172.31.218.41:4655 1388888888 Zjy123456 716996 周左右 01251985 1786449221@qq.com 女"
#coding:utf-8
import re
import json
resp = '
'
respData = re.findall('"dataSource":([.*?]),"type"', resp, re.S)[0]
res = re.findall('"spuId":(.*?),', respData , re.S)
for id in res:
print(id)
返回结果: