正则表达式使用


import re

#  匹配- match从头匹配 (第一个参数就是正则表达式, 数据) 如果开始不能匹配就匹配失败
# 正则表达式都是以r字符开始的 -- 墙裂建议
# 返回值就是匹配结果的对象 如果需要获取对象中的结果就需要.group()
res = re.match(r'ello','ello meizi hello')

# 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
if res:
    print(res.group())
else:
    print("没有匹配结果")

# 搜索 查找 --- 如果开始不能匹配 则继续往后搜索 尝试看能否匹配
res = re.search(r'hello','ello meizi hello')

# 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
if res:
    print(res.group())
else:
    print("没有匹配结果")
    """match函数和search函数的功能 区别和联系"""
In [1]: import re
In [2]: re.match(r"hello","hello").group()
# ----------1. .匹配一个任意字符(除去\n)-----------------------------------------
In [3]: re.match(r".ello","hello").group()
In [4]: re.match(r".ello","Hello").group()
In [5]: re.match(r".ello","1ello").group()
In [6]: re.match(r".ello","&ello").group()
In [7]: re.match(r".ello","\nello").group()  # 出错
In [14]: re.match(r".ello",".ello").group()
In [15]: re.match(r"\.ello",".ello").group()

------------2. []匹配集合中任意一个字符----------------------------------------
In [8]: re.match(r"[Hh]ello","Hello").group()
In [9]: re.match(r"[Hh]ello","hello").group()
In [10]: re.match(r"[Hh]ello","1ello").group() # 出错

In [11]: re.match(r"[0123456789]ello","1ello").group()
In [12]: re.match(r"[0123456789]ello","9ello").group()
In [13]: re.match(r"[0123456789]ello","Hello").group()  # 出错

In [16]: re.match(r"[0123456789]ello","Hello").group()  # 出错
In [17]: re.match(r"[0123456789]ello","1ello").group()

------------3. [-]匹配范围内部的任意一个字符-----------------
In [18]: re.match(r"[0-9]ello","1ello").group()
In [19]: re.match(r"[0-35-9]ello","1ello").group()
In [20]: re.match(r"[0-35-9]ello","9ello").group()
In [21]: re.match(r"[0-35-9]ello","4ello").group()  #  出错
In [22]: re.match(r"[0-9a-zA-Z]ello","4ello").group()
In [23]: re.match(r"[0-9a-zA-Z]ello","aello").group()
In [24]: re.match(r"[0-9a-zA-Z]ello","Hello").group()
In [25]: re.match(r"[0-35-9]ello","9ello").group()
In [26]: re.match(r"[0-35-9]ello","4ello").group()  # 出错

------------4. [^] 禁止匹配 范围内部的任意一个字符-----------------

In [27]: re.match(r"[^4]ello","4ello").group()
In [28]: re.match(r"[^4]ello","0ello").group()
In [29]: re.match(r"[^4]ello","9ello").group()

------------5. '\d'匹配一个任意数字字符  '\D'匹配任意个非数字字符 -----------------

In [30]: re.match(r"\dello","9ello").group()
In [31]: re.match(r"\dello","0ello").group()
In [32]: re.match(r"\dello","@ello").group()

In [33]: re.match(r"\Dello","0ello").group()
In [34]: re.match(r"\Dello","@ello").group()
In [35]: re.match(r"\Dello","?ello").group()

------------6. '\s'匹配一个任意空白字符  '\S'匹配一个任意 非空白字符串-------------------------------
In [36]: re.match(r"\Dello"," ello").group()

In [37]: re.match(r"[\t\r\n\v\f ]ello"," ello").group()
In [38]: re.match(r"\sello"," ello").group()
In [39]: re.match(r"\Sello"," ello").group()
In [40]: re.match(r"\Sello","1ello").group()

-----------7. '\w'匹配一个任意单词字符 '\W'匹配一个任务 非单词字符-----------------------------
In [41]: re.match(r"\wello","1ello").group()
In [42]: re.match(r"\wello","hello").group()
In [43]: re.match(r"\wello","Hello").group()
In [44]: re.match(r"\wello","_ello").group()
In [45]: re.match(r"\Wello","_ello").group()
In [46]: re.match(r"\Wello","?ello").group()

------------'\w'语义拓展
In [2]: re.match(r"\wBC","ABC")
In [3]: re.match(r"\wBC","ABC").group()
In [4]: re.match(r"\wBC","呵BC").group()
In [5]: re.match(r"\wBC","呵BC",re.ASCII).group()
In [6]: re.match(r"\wBC","呵BC",re.UNICODE).group()
In [7]: re.match(r"\wBC","呵BC").group()


-----------二 量词  匹配多个字符---------------------------
In [47]: re.match(r"嫦娥号\d升空了","嫦娥1号升空了").group()
In [48]: re.match(r"嫦娥\d号升空了","嫦娥1号升空了").group()
In [49]: re.match(r"嫦娥\d号升空了","嫦娥9号升空了").group()
In [50]: re.match(r"嫦娥\d号升空了","嫦娥10号升空了").group()
In [51]: re.match(r"嫦娥\d\d号升空了","嫦娥10号升空了").group()
In [52]: re.match(r"嫦娥\d\d号升空了","嫦娥99号升空了").group()
In [53]: re.match(r"嫦娥\d\d号升空了","嫦娥100号升空了").group()
In [54]: re.match(r"嫦娥\d\d\d号升空了","嫦娥100号升空了").group()
In [55]: re.match(r"嫦娥\d\d\d\d\d号升空了","嫦娥10000号升空了").group()

In [56]: re.match(r"嫦娥\d{5}号升空了","嫦娥10000号升空了").group()
In [57]: re.match(r"嫦娥\d{3}号升空了","嫦娥10000号升空了").group()
In [58]: re.match(r"嫦娥\d{3}号升空了","嫦娥100号升空了").group()

In [59]: re.match(r"嫦娥\d{1,3}号升空了","嫦娥100号升空了").group()
In [60]: re.match(r"嫦娥\d{1,3}号升空了","嫦娥1号升空了").group()
In [61]: re.match(r"嫦娥\d{1,3}号升空了","嫦娥10号升空了").group()
In [62]: re.match(r"嫦娥\d{1,1}号升空了","嫦娥10号升空了").group()
In [64]: re.match(r"嫦娥\d{0,3}号升空了","嫦娥号升空了").group()

In [65]: re.match(r"嫦娥\d{0,}号升空了","嫦娥号升空了").group()
In [66]: re.match(r"嫦娥\d*号升空了","嫦娥号升空了").group()

In [67]: re.match(r"嫦娥\d{1,}号升空了","嫦娥1号升空了").group()
In [68]: re.match(r"嫦娥\d{1,}号升空了","嫦娥号升空了").group()
In [69]: re.match(r"嫦娥\d{1,}号升空了","嫦娥1000号升空了").group()
In [70]: re.match(r"嫦娥\d+号升空了","嫦娥1000号升空了").group()

In [8]: re.match(r"\w?BC","ABC").group()
In [9]: re.match(r"\w?BC","BC").group()


---------------三 匹配开始^ 和结束位置$--------------------------

In [71]: re.match(r"\w{4,20}@163.com","hello@163.com").group()
In [72]: re.match(r"\w{4,20}@163.com","hello@163Acom").group()
In [73]: re.match(r"\w{4,20}@163\.com","hello@163Acom").group()
In [74]: re.match(r"\w{4,20}@163\.com","hello@163.com").group()
In [75]: re.match(r"\w{4,20}@163\.com","hello@163.com.cn").group()
In [76]: re.match(r"\w{4,20}@163\.com","cc.hello@163.com").group()
In [77]: re.search(r"\w{4,20}@163\.com","cc.hello@163.com").group()
In [78]: re.search(r"\w{4,20}@163\.com","cc.hello@163.com.cn").group()

In [79]: re.search(r"^\w{4,20}@163\.com","cc.hello@163.com.cn").group()
In [80]: re.search(r"^\w{4,20}@163\.com","hello@163.com.cn").group()

In [81]: re.search(r"^\w{4,20}@163\.com$","hello@163.com.cn").group()
In [82]: re.search(r"^\w{4,20}@163\.com$","hello@163.com").group()
In [83]: re.match(r"^\w{4,20}@163\.com","hello@163.com.cn").group()
In [84]: re.match(r"^\w{4,20}@163\.com$","hello@163.com.cn").group()
In [85]: re.match(r"^\w{4,20}@163\.com$","hello@163.com").group()


-------------四 匹配分组 ()将感兴趣的数据进行提取------
In [86]: re.match(r"嫦娥(\d+)号升空了","嫦娥1000号升空了").group()
In [87]: re.match(r"嫦娥(\d+)号升空了","嫦娥1000号升空了").group(0)
In [88]: re.match(r"嫦娥(\d+)号升空了","嫦娥1000号升空了").group(1)
In [89]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group()
In [90]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group(1)
In [91]: re.match(r"^(\w{4,20})@(163)\.com$","hello@163.com").group(2)

-------(|)匹配其中任何一个表达式并且放入分组中----

In [92]: re.match(r"^(\w{4,20})@(163|qq)\.com$","hello@263.com").group(2)
In [93]: re.match(r"^(\w{4,20})@(163|qq)\.com$","hello@qq.com").group(2)
In [94]: re.match(r"^(\w*)hello(\w*)$","hellohello@qq.com").group(1)


---------------------'\分组编号' 使用某个分组的数据在后面某个位置继续匹配 ----

In [11]: re.match(r"^\w{4,20}@163.com$|^\w{4,20}@qq.com$","hello@163.com").group()
In [12]: re.match(r"^\w{4,20}@163.com$|^\w{4,20}@qq.com$","hello@qq.com").group()
In [13]: re.match(r"^\w{4,20}@(163|qq).com$","hello@qq.com").group()
In [14]: re.match(r"^\w{4,20}@(163|qq).com$","hello@163.com").group()

In [15]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group()
In [16]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group(1)
In [17]: re.match(r"(\d{3,4})-(\d{6,8})","0755-12345678").group(2)

In [18]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-1234567").group(2)
In [19]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-12345678").group()
In [20]: re.match(r"(\d{3,4})-(\d{6,8}) \1-\2","0755-12345678 0755-1234567").group()
In [21]: re.match(r"<(\w+)>","aaa").group(1)
In [22]: re.match(r"<(\w+)>(.*)","aaa").group(1)
In [23]: re.match(r"<(\w+)>(.*)","aaa").group(1)

匹配多个标签中的数据 
hello
"""思考问题  如何创建有名分组  如何引用有名分组"""

In [24]: re.match(r"<(\w+)><(\w+)>(.+)","hello").group()
In [25]: re.match(r"<(\w+)><(\w+)>(.+)","hello").group(1)
In [26]: re.match(r"<(\w+)><(\w+)>(.+)","hello").group(2)
In [27]: re.match(r"<(\w+)><(\w+)>(.+)","hello").group(2)
In [28]: re.match(r"<(\w+)><(\w+)>(.+)","hello").group()

In [29]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group()
In [30]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(1)
In [31]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(2)
In [32]: re.match(r"((\d{3,4})-(\d{6,8}))","0755-12345678").group(3)

In [34]: re.match(r"((\d{3,4})-(\d{6,8})) \2-\3","0755-12345678 0755-12345678").group()

In [35]: re.match(r"(?P\d{3,4})-(?P\d{6,8})","0755-12345678").group()
In [36]: re.match(r"(?P\d{3,4})-(?P\d{6,8})","0755-12345678").group(1)
In [37]: re.match(r"(?P\d{3,4})-(?P\d{6,8})","0755-12345678").group(2)
In [38]: re.match(r"(?P\d{3,4})-(?P\d{6,8})","0755-12345678").group('quhao')
In [39]: re.match(r"(?P\d{3,4})-(?P\d{6,8})","0755-12345678").group('zuoji')
In [40]: re.match(r"(?P\d{3,4})-(?P\d{6,8}) (?P=quhao)-(?P=zuoji)","0755-12345678 07
    ...: 55-12345678").group()

In [41]: re.match(r"((?P\d{3,4})-(?P\d{6,8})) (?P=quhao)-(?P=zuoji)","0755-12345678 
    ...: 0755-12345678").group()


---------------------------re模块高级函数--------------------------
In [42]: ret = re.search(r"\d+", "阅读次数为 9999").group()

In [43]: re.search(r"\d+", "阅读次数为 9999").group()
Out[43]: '9999'

In [46]: re.findall(r"\d+", "python = 9999, c = 7890, c++ = 12345")
Out[46]: ['9999', '7890', '12345']

In [47]: re.sub(r"\d+","998","python=997")
Out[47]: 'python=998'

In [48]: re.sub(r"\d+","998","python=997 c=988")
Out[48]: 'python=998 c=998'

In [49]: re.sub(r"\d+","998","python=997 c=988",1)
Out[49]: 'python=998 c=988'

In [52]: def func(matchobj):
    ...:     data = matchobj.group()
    ...:     str_data = str( int(data) + 1)
    ...:     return str_data
    ...: 

In [53]: re.sub(r"\d+",func, "age=17")
Out[53]: 'age=18'

In [54]: data = """
    ...: 
...:

岗位职责:

...:

完成推荐算法、数据统计、接口、后台等服务器端相关工作

...:


...:

必备要求:

...:

良好的自我驱动力和职业素养,工作积极主动、结果导向

...:

 

...:

技术要求:

...:

1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式

...:

2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架

...:

3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种

...:

4、掌握NoSQL、MQ,熟练使用对应技术解决方案

...:

5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js

...:

 

...:

加分项:

...:

大数据,数理统计,机器学习,sklearn,高性能,大并发。

...: ...:
""" In [55]: re.sub(r"<.*>","",data) Out[55]: '\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ' In [56]: re.sub(r"<\w+>","",data) Out[56]: '\n\n 岗位职责:

\n完成推荐算法、数据统计、接口、后台等服务器端相关工作

\n

\n必备要求:

\n良好的自我驱动力和职业素养,工作积极主动、结果导向

\n 

\n技术要求:

\n1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式

\n2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架

\n3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种

\n4、掌握NoSQL、MQ,熟练使用对应技术解决方案

\n5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js

\n 

\n加分项:

\n大数据,数理统计,机器学习,sklearn,高性能,大并发。

\n\n
' In [57]: re.sub(r"","",data) Out[57]: '\n\n 岗位职责:\n完成推荐算法、数据统计、接口、后台等服务器端相关工作\n\n必备要求:\n良好的自我驱动力和职业素养,工作积极主动、结果导向\n \n技术要求:\n1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式\n2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架\n3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种\n4、掌握NoSQL、MQ,熟练使用对应技术解决方案\n5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js\n \n加分项:\n大数据,数理统计,机器学习,sklearn,高性能,大并发。\n\n ' In [58]: re.sub(r"|\n","",data) Out[58]: ' 岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向 技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js 加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。 ' In [59]: re.sub(r"|\n| ","",data) Out[59]: ' 岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向;技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js;加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。 ' In [60]: re.split(r" ","age=18 name=tom") Out[60]: ['age=18', 'name=tom'] In [61]: re.split(r" |=","age=18 name=tom") Out[61]: ['age', '18', 'name', 'tom'] ----------------------?号将正则转化为非贪婪模式(懒惰模式)------------------------------- In [62]: re.match(r"(\d+)(\d?)","12345678").group() Out[62]: '12345678' In [63]: re.match(r"(\d+)(\d?)","12345678").group(1) Out[63]: '12345678' In [64]: re.match(r"(\d+)(\d?)","12345678").group(2) Out[64]: '' In [65]: re.match(r"(\d+?)(\d?)","12345678").group(2) Out[65]: '2' In [66]: re.match(r"(\d+?)(\d+)","12345678").group(2) Out[66]: '2345678' In [67]: re.match(r"(\d+?)(\d+)","12345678").group(1) Out[67]: '1' In [68]: re.match(r"(\d+)(\d+)","12345678").group(1) Out[68]: '1234567' In [69]: re.match(r"(\d+)(\d+)","12345678").group(2) Out[69]: '8' In [70]: url = """丁叮c的直播""" In [72]: re.search(r"http.*jpg",url).group() Out[72]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg' In [73]: re.search(r"http.*?jpg",url).group() Out[73]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg' ----------------r原生字符串 可以自动将其中的反斜线 进行转义---------------------- In [74]: path = "c:\\a\\b" In [75]: print(path) c:\a\b In [76]: path = "c:\a\b\n" In [77]: print(path) c: In [78]: re.match("c:\\a","c:\\a\\b\\c").group() --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) in () ----> 1 re.match("c:\\a","c:\\a\\b\\c").group() AttributeError: 'NoneType' object has no attribute 'group' In [79]: re.match("c:\\\\a","c:\\a\\b\\c").group() Out[79]: 'c:\\a' In [80]: re.match("c:\\\\a\\\\b\\\\c","c:\\a\\b\\c").group() Out[80]: 'c:\\a\\b\\c' In [81]: re.match(r"c:\\a\\b\\c","c:\\a\\b\\c").group() Out[81]: 'c:\\a\\b\\c' In [82]: r"c:\\a\\b\\c" Out[82]: 'c:\\\\a\\\\b\\\\c'