Python基础语法速查
Python基础语法速查
Python编码规范
基础语法(上)
导包
import 模块名 [as 别名]
import numpy as np print(np.sin(x))
from 模块名 import 对象名 [as 别名]
from matplotlib.pyplot as plt plt.plot(x,y)
from 模块名 import *
from math import * print(pi)
name属性
自己模块叫main, 导入其他模块之后叫自己模块名
def main():
if __name__ == '__main__':
print('This program is run directly.')
elif __name__ == 'hello.py':
print('This program is used as a module.') 其可以让某些代码块中,只运行本模块的代码,导入的代码不运行
if __name__ == '__main__':
pass 变量
name1 = "张三" name2 = name1 print(name1, name2) name2 = "李四" print(name1, name2)
张三 张三 张三 李四
引用机制, 内存中开辟空间并赋值"张三" 用name1指向"张三"的地址
name2指向name1指向的地址
内存中开辟新空间并赋值"李四", 更改name2的指向方向, 使其指向"李四"的地址
查看内存地址和判断数据类型
>>> x=3 >>> id(x) 10968864 >>> type(x) <class 'int'> >>> type(x) == int True >>> isinstance(x,int) True
注释
# 我是注释 """ 我也是注释 """
用户输入输出
name = input("What is your name? ")
age = input(int("How old are you? "))
age = input(eval("How old are you? ")) print(1,2,3,sep='\t') print(1,end=' ')
格式化打印
print('a={0}, b={1}, c={2}'.format('a','b','c')) a=a, b=b, c=c
数字
>>> 9999**99 990148353526723487602263124753282625570559528895791057324326529121794837894053513464422176826916433932586924386677766244032001623756821400432975051208820204980098735552703841362304669970510691243800218202840374329378800694920309791954185117798434329591212159106298699938669908067573374724331208942425544893910910073205049031656789220889560732962926226305865706593594917896276756396848514900989999 >>> 0.3+0.2 0.5 >>> 0.4-0.1 0.30000000000000004 >>> 0.4-0.1 == 0.3 False >>> abs(0.4-0.3-0.1) < 1e-6 True
>>> x=3+4j >>> y=5+6j >>> x+y (8+10j) >>> x*y (-9+38j) >>> abs(x) 5.0 >>> x.imag 4.0 >>> x.real 3.0 >>> x.conjugate() (3-4j)
分数
>>> from fractions import Fraction >>> x = Fraction(3,5) >>> y = Fraction(3,7) >>> x Fraction(3, 5) >>> x**2 Fraction(9, 25) >>> x.numerator 3 >>> x.denominator 5 >>> x+y Fraction(36, 35) >>> x-y Fraction(6, 35) >>> x*y Fraction(9, 35) >>> x/y Fraction(7, 5) >>> x*2 Fraction(6, 5) >>> Fraction(3.5) Fraction(7, 2)
>>> from fractions import Decimal
>>> 1/9
0.1111111111111111
>>> Decimal(1/9)
Decimal('0.111111111111111104943205418749130330979824066162109375')
>>> 1/3
0.3333333333333333
>>> Decimal(1/3)
Decimal('0.333333333333333314829616256247390992939472198486328125')
>>> Decimal(1/9) + Decimal(1/3)
Decimal('0.4444444444444444197728216750') 类型转换
>>> bin(555) # 转化为二进制
'0b1000101011'
>>> oct(555) # 转化为八进制
'0o1053'
>>> hex(555) # 转化为十六进制
'0x22b'
>>> int('0x22b', 16) # 十六进制转化为十进制
555
>>> int('0o1053',8) # 八进制转化为十进制
555
>>> int('0b1000101011',2) # 二进制转化为十进制
555 >>> ord('a') # 查看指定字符的 Unicode 编码
97
>>> ord('栋') # 支持中文
26635
>>> chr(26635) # 查看对应字符
'栋' 流程控制语句
age = 48
while True:
guess = int(input("你猜我几岁: "))
if guess > age :
print("猜的太大了,往小里试试...")
elif guess < age :
print("猜的太小了,往大里试试...")
else:
print("恭喜你,猜对了...")
break 你猜我几岁: 89 猜的太大了,往小里试试... 你猜我几岁: 12 猜的太小了,往大里试试... 你猜我几岁: 48 恭喜你,猜对了...
身份运算
# is / is not 判定是否引用同一个对象, 比较内存地址 # 比较某个变量是否是某个类型 age = 99 age2 = 99 print(type(age) is int) print(age2 is age) print(id(age), id(age2))
True True 2012969040 2012969040
三元运算
a = 3 b = 4 print(a > b?a:b) max = a if a > b else b
基础语法(中)
列表
创建
>>> a = [1,2,3,4,'4']
>>> a
[1, 2, 3, 4, '4']
>>> list((1,2,3,4))
[1, 2, 3, 4]
>>> list(range(0,10,2))
[0, 2, 4, 6, 8]
>>> list({a:'2',b:'3'})
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unhashable type: 'list'
>>> list({'a':3,'b':4})
['a', 'b']
>>> list({'a':3,'b':4}.items())
[('a', 3), ('b', 4)] 访问
>>> a = [1,2,3,4,'4'] >>> a[0] 1 >>> a[-1] '4'
追加
names = ["钢铁侠", "蜘蛛侠"]
names.append("蝙蝠侠")
print(names) ['钢铁侠', '蜘蛛侠', '蝙蝠侠']
插入
names = ['钢铁侠', '蜘蛛侠', '蝙蝠侠'] names.insert(1,"路飞") print(names)
['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠']
合并
names = ['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠'] temp = ["a", "b", "c", "d"] names.extend(temp) print(names)
['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
列表嵌套
names = ['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd'] names.insert(2,[1,2,3]) print(names)
['钢铁侠', '路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
del 删除
names = ['钢铁侠', '路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd'] del(names[0]) print(names)
['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
pop 删除
names = ['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd'] print(names.pop()) print(names.pop(1))
d [1, 2, 3]
count计数
>>> x = [1,2,2,2,3,4,5,5,6] >>> x.count(2) 3
index索引
>>> x = [1,2,2,2,3,4,5,5,6] >>> x.index(2) 1
sort就地排序
>>> x=list(range(11)) >>> import random >>> random.shuffle(x) >>> x [10, 3, 9, 4, 7, 6, 2, 5, 1, 0, 8] >>> x.sort(reverse=True) >>> x [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
reverse就地逆转
>>> x.reverse() >>> x [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
copy浅拷贝
>>> x=[1,2,[3,4]] >>> y=x.copy() >>> y [1, 2, [3, 4]] >>> y[2].append(5) >>> y [1, 2, [3, 4, 5]] >>> x [1, 2, [3, 4, 5]]
deepcopy深拷贝
>>> import copy >>> x=[1,2,[3,4]] >>> y=copy.deepcopy(x) >>> x[2].append(5) >>> x [1, 2, [3, 4, 5]] >>> y [1, 2, [3, 4]]
列表推导式
>>> l = [x*x for x in range(10)] >>> l [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] >>> sum([2**i for i in range(64)]) 18446744073709551615
嵌套列表平铺
>>> vec = [[1,2,3],[4,5,6],[7,8,9]] >>> [num for elemen in vec for num in elem] >>> [num for elem in vec for num in elem] [1, 2, 3, 4, 5, 6, 7, 8, 9]
过滤不符合条件的元素
>>> l = [-1,-4,6,7,-2] >>> [i for i in l if i>0] [6, 7]
切片
>>> l = list(range(10)) >>> l [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> l[0:3] [0, 1, 2] >>> l[0:3:2] [0, 2] >>> l[::-1] [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
clear 清空
names = ['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c'] names.clear() print(names)
[]
元组
不可变的列表
创建
ages = (18,19,20,33,83) ages = tuple([18,29,23,40])
常用操作
#索引
>>> ages = (11, 22, 33, 44, 55)
>>> ages[0]
11
>>> ages[3]
44
>>> ages[-1]
55
#切片:同list
#循环
>>> for age in ages:
print(age)
11
22
33
44
55
#长度
>>> len(ages)
5
#包含
>>> 11 in ages
True
>>> 66 in ages
False
>>> 11 not in ages
False 注意:元组本身不可变,如果元组中还包含其他可变元素,这些可变元素可以改变
>>> data (99, 88, 77, ['Alex', 'Jack'], 33) >>> data[3][0] = '金角大王' >>> data (99, 88, 77, ['金角大王', 'Jack'], 33)
字典
创建操作
>>>person = {"name": "dylan", 'age': 18}
#或
>>>person = dict(name='seven', age=20)
#或
>>>person = dict({"name": "egon", 'age': 20})
#或
>>> {}.fromkeys([1,2,3,4,5,6,7,8],100)
{1: 100, 2: 100, 3: 100, 4: 100, 5: 100, 6: 100, 7: 100, 8: 100} 增加操作
names = {
"dylan": [18, "CEO", 66000],
"黑姑娘": [24, "行政", 4000],
}
# 新增k
names["佩奇"] = [26, "讲师", 40000] 删除操作
names.pop("dylan") # 删除指定key
names.popitem() # 随便删除1个keydel
del names["abc"] # 删除指定key,同pop方法
names.clear() # 清空dict 修改操作
dic['key'] = 'new_value' # 如果key在字典中存在,'new_value'将会替代原来的value值;dic.update(dic2) # 将字典dic2的键值对添加到字典dic中
查操作
dic['key'] #返回字典中key对应的值,若key不存在字典中,则报错;
'key' in dic #若存在则返回True,没有则返回False
dic.get['key'] #不存在返回默认值
>>> dic = dict(name='dylan')
>>> dic
{'name': 'dylan'}
>>> dic.get('a')
>>> dic.get('a','Not exists.')
'Not exists.'
dic.keys() #返回一个包含字典所有KEY的列表;
dic.values() #返回一个包含字典所有value的列表;
dic.items() #返回一个包含所有(键,值)元组的列表; 循环
1、for k in dic.keys()
2、for k,v in dic.items()
3、for k in dic # 推荐用这种,效率速度最快
info = {
"name":"dylan",
"age": 18,
}
for k in info:
print(k,info[k])
# 输出
name dylan
age 18 求长度
len(dic)
集合
创建集合
>>> a = {1,2,3,4,2,'alex',3,'rain','abc'}
>>> a{1, 2, 3, 4, 'abc', 'rain'} 由于它是天生去重的,重复的值你根本存不进去
帮列表去重
帮列表去重最快速的办法是什么? 就是把它转成集合,去重完,再转回列表
>>> b[1, 2, 3, 4, 2, 'abc', 3, 'rain', 'abc']
>>> set(b)
>>> {1, 2, 3, 4, 'abc', 'rain'}
>>> b = list(set(b)) #一句代码搞定
>>> b[1, 2, 3, 4, 'abc', 'rain'] 增删改查
>>> a
{1, 2, 3, 4, 'abc', 'rain'}
#新增
>>> a.add('黑姑娘')
#删除discard
>>> a
{2, 3, '黑姑娘', 'abc', 'rain'}
>>> a.discard('rain') #删除一个存在的值
>>> a.discard('rain2') #如果这个值不存在,do nothing.
>>> a
{2, 3, '黑姑娘', 'abc'}
>>> #随机删除,少用,或特定场景用
>>> a.pop() #删除并返回1
#删除remove
>>> a.remove(4)
#查
>>> a
{2, 3, '黑姑娘', 'abc', 'rain'}
>>> 'abc' in aTrue #改呵呵,不能改 关系运算
s_1024 = {"佩奇","老男孩","海峰","马JJ","老村长","黑姑娘","Alex"}
s_pornhub = {"Alex","Egon","Rain","马JJ","Nick","Jack"}
print(s_1024 & s_pornhub) # 交集, elements in both set
print(s_1024 | s_pornhub) # 并集 or 合集
print(s_1024 - s_pornhub) # 差集 , only in 1024
print(s_pornhub - s_1024) # 差集, only in pornhub
print(s_1024 ^ s_pornhub) # 对称差集, 把脚踩2只船的人T出去 两个集合之间一般有三种关系,相交、包含、不相交。在Python中分别用下面的方法判断:
print(s_1024.isdisjoint(s_pornhub)) # 判断2个集合是不是不相交,返回True or False print(s_1024.issubset(s_pornhub)) # 判断s_1024是不是s_pornhub的子集,返回True or False print(s_1024.issuperset(s_pornhub)) # 判断s_1024是不是s_pornhub的父集,返回True or False
内置函数
enumerate()
>>> list(enumerate('abcd'))
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]
>>> list(enumerate(['Python','Java']))
[(0, 'Python'), (1, 'Java')]
>>> list(enumerate({'a':97,'b':98}))
[(0, 'a'), (1, 'b')]
>>> list(enumerate({'a':97,'b':98}.items()))
[(0, ('a', 97)), (1, ('b', 98))]
>>> for index,value in enumerate(range(10,15)):
... print((index,value),end=' ')
...
(0, 10) (1, 11) (2, 12) (3, 13) (4, 14)
# 带start参数
>>> for item in enumerate(range(5),6):
... print(item, end=' ')
...
(6, 0) (7, 1) (8, 2) (9, 3) (10, 4) map()
>>> list(map(str,range(5))) ['0', '1', '2', '3', '4'] >>> def add5(v): ... return v+5 ... >>> list(map(add5,range(10))) [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] >>> def add(x,y): ... return x+y ... >>> list(map(add,range(5),range(5,10))) [5, 7, 9, 11, 13] >>> list(map(lambda x,y:x+y,range(5),range(5,10))) [5, 7, 9, 11, 13] # 自定义比较复杂的map映射函数 >>> def myMap(lst,value): ... return map(lambda item:item+value, lst) ... >>> list(myMap(range(5),5)) [5, 6, 7, 8, 9] >>> list(myMap(range(5),8)) [8, 9, 10, 11, 12] # 实现序列与数字的四则运算 >>> def myMap(iterable, op, value): ... if op not in '+-*/': ... return 'Error operator' ... func = lambda i:eval(repr(i)+op+repr(value)) ... return map(func,iterable) ... >>> list(myMap(range(5),'+',5)) [5, 6, 7, 8, 9]
>>> a,b,c=map(int,'123') >>> a 1 >>> b 2 >>> c 3
reduce()
>>> from functools import reduce >>> def add(x,y): ... return x+y ... >>> reduce(add,range(1,10)) 45 >>> reduce(lambda x,y:x+y, range(1,10)) 45
>>> import operator >>> reduce(operator.add, range(1,10)) 45 >>> reduce(operator.add, range(1,10),5) 50 >>> reduce(operator.mul, range(1,6)) 120 >>> reduce(operator.add, map(str, range(1,10))) '123456789' >>> ''.join(map(str,range(1,10))) '123456789'
filter()
>>> seq = ['foo','x41','?!','***'] >>> seq ['foo', 'x41', '?!', '***'] >>> def func(x): ... return x.isalnum() # 判断是否为字符或数字 ... >>> filter(func,seq) <filter object at 0x7f7830f736a0> >>> list(filter(func,seq)) ['foo', 'x41'] >>> seq ['foo', 'x41', '?!', '***'] >>> [x for x in seq if x.isalnum()] ['foo', 'x41'] >>> list(filter(lambda x:x.isalnum(), seq)) ['foo', 'x41'] >>> list(filter(None,[123,0,0,4,0,5])) [123, 4, 5]
range()
>>> list(range(2,10)) [2, 3, 4, 5, 6, 7, 8, 9] >>> list(range(2,10,2)) [2, 4, 6, 8] >>> for i in range(4): ... print(i) ... 0 1 2 3
zip()
>>> list(zip('abcd',[1,2,3]))
[('a', 1), ('b', 2), ('c', 3)]
>>> list(zip('abc'))
[('a',), ('b',), ('c',)]
>>> list(zip('123','abc','!@#'))
[('1', 'a', '!'), ('2', 'b', '@'), ('3', 'c', '#')]
>>> for item in zip('abcd',range(3)):
... print(item)
...
('a', 0)
('b', 1)
('c', 2)
>>> x = zip('abcd','1234')
>>> list(x)
[('a', '1'), ('b', '2'), ('c', '3'), ('d', '4')]
>>> list(x) # 只能遍历一次
[] eval()
>>> eval('3+5')
8
>>> eval('9')
9
>>> eval('4 / 5')
0.8
>>> eval('4' + '/' + '5')
0.8 函数
普通函数
d = {"name":"Dylan","age":26,"hobbie":"游泳"}
l = ["Rebeeca","Katrina","Rachel"]
def change_data(info,girls):
info["hobbie"] = "学习"
girls.append("XiaoYun")
change_data(d,l)
print(d,l) {'name': 'Dylan', 'age': 26, 'hobbie': ***习'} ['Rebeeca', 'Katrina', 'Rachel', 'XiaoYun'] 参数问题
def func(a,b,c=3):
print(a,b,c)
func(1,2)
# 封装成元祖和字典
def func(*args,**kwargs):
print(args)
print(kwargs)
func('a','b',name='dylan',age=18) 匿名函数
#这段代码
def calc(x,y):
return x**y
print(calc(2,5))
#换成匿名函数
calc = lambda x,y:x**y
print(calc(2,5)) 32 32
res = map(lambda x:x**2,[1,5,7,4,8])
for i in res:
print(i) 1 25 49 16 64
高阶函数
def get_abs(n):
if n < 0 :
n = abs(n)
return n
def add(x,y,f):
return f(x) + f(y)
res = add(3,-6,get_abs)
print(res) 闭包
def outer():
name = 'alex'
def inner():
print("在inner里打印外层函数的变量",name)
return inner # 注意这里只是返回inner的内存地址,并未执行
f = outer() # .inner at 0x1027621e0>
f() # 相当于执行的是inner() 装饰器
def use_logging(func):
def wrapper():
print("%s is running" % func.__name__)
func()
return wrapper
def foo():
print("i am foo")
foo = use_logging(foo)
foo() def use_logging(func):
def wrapper():
print("%s is running" % func.__name__)
func()
return wrapper
@use_logging
def foo():
print("i am foo")
foo() 带参数的装饰器
def use_loggin(level):
def decorator(func):
def wrapper(*args, **kwargs):
if level == 'warn':
print('warn')
elif level == 'info':
print('info')
func(*args)
return wrapper
return decorator
@use_loggin(level='warn')
def foo(name='foo'):
print('I am foo')
foo() 类装饰器
class Foo(object):
def __init__(self, func):
self._func = func
def __call__(self):
print ('class decorator runing')
self._func()
print ('class decorator ending')
@Foo
def bar():
print ('bar')
bar() 生成器
>>> g = (x for x in range(5)) >>> next(g) 0 >>> next(g) 1 >>> next(g) 2 >>> next(g) 3 >>> next(g) 4 >>> next(g) Traceback (most recent call last): File "<stdin>", line 1, in <module> StopIteration
>>> g = (x for x in range(5)) >>> for i in g: ... print(i) ... 0 1 2 3 4
def func():
for i in range(10):
yield i
f = func()
print(f)
print(f.__next__())
print(f.__next__())
print('干点其它事')
print(f.__next__())
print(f.__next__()) <generator object func at 0x7f239cc55308> 0 1 干点其它事 2 3
面向对象
基础语法
class Dog:
d_type = "京巴"
def __init__(self,name,age,master):
print('初始化这个实例....',name)
self.name = name
self.age = age
self.master = master
def say_hi(self):
print("hello , I am a dog,my type is ",self.d_type)
d = Dog("毛毛",2,"Alex")
d2 = Dog("二蛋",3,"Jack")
d.say_hi()
d2.say_hi()
print(d2.name, d2.age, d2.master) 初始化这个实例.... 毛毛 初始化这个实例.... 二蛋 hello , I am a dog,my type is 京巴 hello , I am a dog,my type is 京巴 二蛋 3 Jack
依赖关系
class Dog:
def __init__(self,name,age,breed,master):
self.name = name
self.age = age
self.breed = breed
self.master = master # master传进来的应该是个对象
self.sayhi() # 调用自己的方法在实例化的时候
def sayhi(self):
print("Hi, I'm %s, a %s dog, my master is %s" %(self.name,self.breed,self.master.name))
class Person:
def __init__(self,name,age,sex):
self.name = name
self.age = age
self.sex = sex
def walk_dog(self,dog_obj):
"""遛狗"""
print("主人[%s]带狗[%s]去溜溜." % (self.name,dog_obj.name ))
p = Person("Dylan",18,"Male")
d = Dog("Jack",5,"二哈",p)
p.walk_dog(d) Hi, I'm Jack, a 二哈 dog, my master is Dylan 主人[Dylan]带狗[Jack]去溜溜
组合关系
class Dog: # 定义一个狗类
role = 'dog' # 狗的角色属性都是狗
......
class Weapon:
def stick(self,obj):
"""打狗棒"""
self.name = "打狗棒"
self.attack_val = 40
obj.life_val -= self.attack_val
self.print_log(obj)
def knife(self,obj):
"""屠龙刀"""
self.name = "屠龙刀"
self.attack_val = 80
obj.life_val -= self.attack_val
self.print_log(obj)
def gun(self,obj):
"""AK47"""
self.name = "AK47"
self.attack_val = 100
obj.life_val -= self.attack_val
self.print_log(obj)
def print_log(self,obj):
print("[%s]被[%s]攻击了,掉血[%s],还剩血量[%s]..." %(obj.name,self.name,self.attack_val,obj.life_val))
class Person: # 定义一个人类
role = 'person' # 人的角色属性都是人
def __init__(self, name, sex, attack_val):
self.name = name
self.attack_val = attack_val
self.life_val = 100
self.sex = sex
self.weapon = Weapon() # 在此处实例化一个Weapon对象
def attack(self,dog):
# 人可以攻击狗,这里传递进来的dog也是一个对象。
# 人攻击狗,那么狗的生命值就会根据人的攻击力而下降
dog.life_val -= self.attack_val
print("人[%s]打了狗[%s],狗掉血[%s],还剩血量[%s]..." % (self.name,dog.name,self.attack_val,dog.life_val))
d = Dog("mjj","二哈",20)
p = Person("Alex","Male",60)
d.bite(p) # 对象交互,把p实例传递给d的方法
p.attack(d)
p.weapon.knife(d) # 通过组合的方式调用weapon实例下的具体武器
p.weapon.stick(d) class BirthDate:
def __init__(self, year, month, day):
self.year = year
self.month = month
self.day = day
class Course:
def __init__(self, name, price, period):
self.name = name
self.price = price
self.period = period
class Teacher:
def __init__(self, name, gender, birth, course):
self.name = name
self.gender = gender
self.birth = birth
self.course = course
def teaching(self):
print('teaching.....',self.course.name)
p1 = Teacher('Alex', 'Male',
BirthDate('1995', '1', '27'),
Course('Python', '28000', '5 months')
)
print(p1.birth.year, p1.birth.month, p1.birth.day)
print(p1.course.name, p1.course.price, p1.course.period) 属性的增删改查
class Person:
nationality = "Chinese"
addr = "北京"
def __init__(self,name,age,sex):
self.name = name
self.age = age
self.sex = sex
# 实例属性操作
p = Person("Dylan",18,"Male")
p.name = "Jack" # 修改属性
p.job = "CEO" # 添加实例属性
del p.sex # 删除实例属性
print(p.job) #打印添加的实例属性
# 类属性操作
Person.nationality = "US"
Person.race = "Yellow" # 添加类属性
del Person.addr
print(p.addr) # 再调用已删除的类属性就会报错了 继承
class Animal:
def __init__(self,name,age,sex):
self.name = name
self.age = age
self.sex = sex
def eat(self):
print("[%s] is eating..."%self.name)
class People(Animal):
def walk(self):
print("People [%s] is walking..." % self.name)
class Pig(Animal):
def eat(self): # 对父类的方法进行了重构
print("Pig [%s] is eating..." % self.name)
person = People("Alex",25,"Male")
pig = Pig("Mjj",4,"公")
person.walk()
person.eat() # 继承自父类的eat方法
pig.eat() class People(Animal):
def __init__(self,name,age,sex,race):
#Animal.__init__(self,name,age,sex) # 先执行父类方法
super(People,self).__init__(name,age,sex)
#super().__init__(name,age,sex) # 跟上面这行super语法的效果一样,一般用这种写法的多
self.race = race # 再加上子类的属性
print("初始化了一个人....") C3算法
class A:
def test(self):
print('from A')
class B(A):
# def test(self):
# print('from B')
pass
class B2:
def test(self):
print('from B2')
class C(A):
def test(self):
print('from C')
class C2:
def test(self):
print('from C2')
class D(B,B2):
# def test(self):
# print('from D')
pass
class E(C,C2):
def test(self):
print('from E')
class F(D,E):
# def test(self):
# print('from F')
pass
f1=F()
f1.test()
print(F.__mro__) # 打印类的继承顺序 from E (<class '__main__.F'>, <class '__main__.D'>, <class '__main__.B'>, <class '__main__.E'>, <class '__main__.C'>, <class '__main__.A'>, <class '__main__.B2'>, <class '__main__.C2'>, <class 'object'>)
封装
class Test(object):
def __init__(self, value):
self.__value = value
def __get(self):
return self.__value
def __set(self, v):
self.__value = v 多态
统一函数接口实现多态
class Dog(object):
def sound(self):
print("汪汪汪.....")
class Cat(object):
def sound(self):
print("喵喵喵.....")
def make_sound(animal_type):
"""统一调用接口"""
animal_type.sound() # 不管你传进来是什么动物,我都调用sound()方法
dogObj = Dog()
catObj = Cat()
make_sound(dogObj)
make_sound(catObj) 通过抽象类实现多态 (最常用)
from abc import ABCMeta, abstractmethod
class Animal(metaclass=ABCMeta):
@abstractmethod
def show(self):
print('I am an animal.')
class Cat(Animal):
def show(self):
print('I am a cat.')
class Dog(Animal):
def show(self):
print('I am a dog.')
class Tiger(Animal):
def show(self):
print('I am a tiger.')
x = [item() for item in (Cat, Dog, Tiger)]
for item in x:
item.show() 类方法
class Student(object):
__stu_num = 0 # 学员计数需存在类变量里,不能存在每个实例里
def __init__(self,name):
self.name = name
self.add_stu_num() # 相当于Student.add_stu_num() 初始化学员时调用
@classmethod
def add_stu_num(cls): # 注意这里调用时传进来的其实是类本身,不是实例本身,所以参数名一般改为cls
cls.__stu_num += 1
print("total student num:",cls.__stu_num)
s1 = Student("张1")
s2 = Student("张2")
s3 = Student("张3")
s4 = Student("张4")
Student.add_stu_num() # 也可以这样调 静态方法
class Student(object):
stu_num = 0
def __init__(self,name):
self.name = name
@staticmethod
def fly():
print('static')
Student.fly() Property
class Flight(object):
def __init__(self,name):
self.flight_name = name
def checking_status(self):
print("connecting airline company api...... " )
print("checking flight %s status " % self.flight_name)
return 1
@property
def flight_status(self):
status = self.checking_status()
if status == 0 :
print("flight got canceled...")
elif status == 1 :
print("flight is arrived...")
elif status == 2:
print("flight has departured already...")
else:
print("cannot confirm the flight status...,please check later")
@flight_status.setter # 修改
def flight_status(self,status):
print('change.')
@flight_status.deleter
def flight_status(self):
print('delete.')
f = Flight("CA980")
f.flight_status
f.flight_status = 1
del f.flight_status
f.flight_status 反射
基础语法(下)
异常处理
try-except
a = input('Please input: ')
try:
x = int(a);
except Exception as e:
print('Error!') try-except-else
a = input('Please input: ')
try:
x = int(a);
except Exception as e:
print('Error!')
else:
print('else') # try未发生异常就执行 try-except-finally
a = input('Please input: ')
try:
x = int(a);
except Exception as e:
print('Error!')
finally:
print('fianlly') # 无论try发不发生异常,都会执行 捕获多种异常
try:
x=float(input('请输入除数: '))
y=float(input('请输入被除数: '))
z=x/y
except ZeroDivisionError:
print('除数不能为零')
except TypeError:
print('被除数和除数应该为数值类型')
except NameError:
print('变量不存在')
else:
print(x,'/',y,'=',z) try:
x=float(input('请输入除数: '))
y=float(input('请输入被除数: '))
z=x/y
except (ZeroDivisionError, TypeError, NameError):
print('Error')
else:
print(x,'/',y,'=',z) 复合异常语句
def div(x,y):
try:
z=x/y
except ZeroDivisionError:
print('除数不能为零')
except TypeError:
print('被除数和除数应该为数值类型')
else:
print('x,'/',y,'=',z')
finally:
print('executing finally clause') 操作文件
- 将字符串写入文本文件,然后再读取并输出
s='Hello world\n文本文件的读取方法\n文本文件的写入方法\n'
with open('sample.txt','w') as fp:
fp.write(s)
with open('sample.txt','r') as fp:
conten = fp.read()
print(conten) - 拷贝文本文件
def fileCopy(src,dst,srcEncoding,dstEncoding):
with open(src,'r',encoding=srcEncoding) as srcfp:
with open(dst,'w',encoding=dstEncoding) as dstfp:
dstf.write(srcfp.read())
fileCopy('sample.txt','sample_new.txt','gbk','utf-8') - readline()
with open('temp.txt','r') as fp:
data = fp.readline()
print(data.strip()) - readlines()
with open('temp.txt','r') as fp:
data = fp.readlines()
for i in data:
print(i.strip()) - json()
dumps把数据类型转换成字符串
dump把数据类型转换成字符串并存储在文件中
loads把字符串转换成数据类型
load把文件打开从字符串转换成数据类型
import json
test_dict = {'bigberg': [7600, {1: [['iPhone', 6300], ['Bike', 800], ['shirt', 300]]}]}
print(test_dict)
print(type(test_dict))
#dumps 将字典转换成json字符串
json_str = json.dumps(test_dict)
# loads 将json字符串转化为字典
new_dict = json.loads(json_str)
print(new_dict)
print(type(new_dict))
# 写入json文件
with open('temp.json', 'w') as fp:
json.dump(new_dict, fp)
# 读取json文件
with open('temp.json', 'r') as fp:
load_dic = json.load(fp)
print(load_dic) 字符串
>>> x = 'abc' >>> x 'abc' >>> y = "xyz" >>> y 'xyz' >>> x + y 'abcxyz'
>>> type('Hello world')
<class 'str'>
>>> type(b'Hello world')
<class 'bytes'>
>>> 'Hello world'.encode('utf-8')
b'Hello world'
>>> 'Hello world'.encode('gbk')
b'Hello world'
>>> a = '洪世贤'.encode('utf-8')
>>> a
b'\xe6\xb4\xaa\xe4\xb8\x96\xe8\xb4\xa4'
>>> a.decode('utf-8')
'洪世贤'
>>> b = '洪世贤'.encode('gbk')
>>> b
b'\xba\xe9\xca\xc0\xcf\xcd'
>>> b.decode('gbk')
'洪世贤' - GB2312 :2个字节表示中文
- GBK:2个字节表示中文
- UTF-8: 3个字节表示中文
find()、 rfind()、 index()、 rindex()、 count()
>>> s = "apple, peach, banana, peach, pear"
>>> s.find("peach")
7
>>> s.find("peach",9)
22
>>> s.find("peach",9, 20) # 指定位置查找, 不存在返回-1
-1
>>> s.rfind('p') # 从右往前找
29
>>> s.index('p')
1
>>> s.rindex('pe')
29
>>> s.index('pear')
29
>>> s.index('ppp') # 不存在报错
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: substring not found
>>> s.count('p') # 计数
5
>>> s.count('ppp') # 不存在返回0
0 split()、 rsplit()、 partition()、 rpartition()
>>> s
'apple, peach, banana, peach'
>>> s.split(",")
['apple', ' peach', ' banana', ' peach']
>>> s="2019-7-10"
>>> t = s.split("-")
>>> t
['2019', '7', '10']
>>> list(map(int,t)) # 将字符串列表转化为int类型列表
[2019, 7, 10] split()、 rsplit() 默认分割符是任意空白符
>>> s.split('-',maxsplit=1) # 指定最大分割次数
['2019', '7-10']
>>> s.rsplit('-',maxsplit=1)
['2019-7', '10'] >>> s = 'apple, peach, banana, peach'
>>> s.partition(',')
('apple', ',', ' peach, banana, peach')
>>> s.rpartition(',')
('apple, peach, banana', ',', ' peach')
>>> s.rpartition('banana')
('apple, peach, ', 'banana', ', peach')
>>> s.rpartition('xxx')
('', '', 'apple, peach, banana, peach')
>>> 'abababab'.partition('a')
('', 'a', 'bababab')
>>> 'abababab'.rpartition('a')
('ababab', 'a', 'b') join()
>>> li
['apple', 'banana', 'peach', 'pear']
>>> ','.join(li)
'apple,banana,peach,pear'
>>> ':'.join(li)
'apple:banana:peach:pear'
>>> ''.join(li)
'applebananapeachpear'
# 判断两个字符串除空白符外,内容是否一致
>>> x = 'aaa bb cde ffff '
>>> ' '.join(x.split())
'aaa bb cde ffff'
>>> def equavilent(s1,s2):
... if s1 == s2:
... return True
... elif ''.join(s1.split()) == ''.join(s2.split()):
... return True
... else:
... return False
...
>>> equavilent('pip list', 'pip list')
True
>>> equavilent('pip list', 'pip li st')
True
>>> equavilent(' pip list', 'pip li st')
True lower()、 upper()、 capitalize()、 title()、 swapcase()
>>> s = 'What is Your Name?' >>> s.lower() 'what is your name?' >>> s.upper() 'WHAT IS YOUR NAME?' >>> s.capitalize() 'What is your name?' >>> s.title() 'What Is Your Name?' >>> s.swapcase() 'wHAT IS yOUR nAME?'
replace()
>>> s = '中国 中国'
>>> print(s.replace('中国', '中华人民共和国'))
中华人民共和国 中华人民共和国
>>> print('abceeeeabcggg'.replace('abc' ,'ABC'))
ABCeeeeABCggg
>>> strip()、 lstrip()、 rstrip()
>>> s = ' abc '
>>> s.lstrip()
'abc '
>>> s.rstrip()
' abc'
>>> s.strip()
'abc'
>>>
>>> 'aaabbdasa'.strip('a')
'bbdas'
>>> 'aaabbdasa'.rstrip('a')
'aaabbdas'
>>> 'aaabbdasa'.strip('ab')
'das'
>>> 'aaabbdasa'.rstrip('ab')
'aaabbdas' startswith()、 endswith()
>>> s = 'Beautiful is better than ugly'
>>> s.startswith('Be')
True
>>> s.startswith('Be',5)
False
>>> s.startswith('Be',0,5)
True
>>> s.endswith('Be',0,5)
False
>>> s.endswith('ugly')
True isalnum()、 isalpha()、 isdigit()、 isdecimal()、 isnumeric()、 isspace()、 isupper()、 islower()
>>> '1234abcd'.isalnum() True >>> '1234abcd'.isalpha() False >>> '1234abcd'.isdigit() False >>> '1234.5'.isdigit() False >>> '1234'.isdigit() True >>> '九'.isnumeric() True >>> '1234.5'.isnumeric() False >>> '1234.5'.isdecimal() False >>> '1.5'.isdecimal() False >>> 'ABc'.isupper() False >>> 'ABC'.isupper() True >>> 'abc'.islower() True
center()、 ljust()、 rjust()、 zfill()
>>> 'Hello world!'.center(20) ' Hello world! ' >>> 'Hello world!'.center(20, '=') '====Hello world!====' >>> 'Hello world!'.ljust(20, '=') 'Hello world!========' >>> 'Hello world!'.rjust(20, '=') '========Hello world!' >>> 'abc'.zfill(5) '00abc' >>> 'abc'.zfill(2) 'abc' >>> 'abc'.zfill(20) '00000000000000000abc'
中英文分词
x = '狗 dog 猫 cat 杯子 cup 桌子 table 你好'
c = []
e = []
t = ''
for ch in x:
if 'a'<=ch<='z' or 'A'<=ch<='Z':
t += ch
elif t:
e.append(t)
t = ''
for ch in x:
if 0x4e00<=ord(ch)<=0x9fa5:
t += ch
elif t:
c.append(t)
t = ''
print(e)
print(c)
// ['dog', 'cat', 'cup', 'table']
// ['狗', '猫', '杯子', '桌子'] 正则表达式
常用模块
os模块
- 常用方法
得到当前工作目录,即当前Python脚本工作的目录路径: os.getcwd()
返回指定目录下的所有文件和目录名:os.listdir()
函数用来删除一个文件:os.remove()
删除多个目录:os.removedirs(r“c:\python”)
检验给出的路径是否是一个文件:os.path.isfile()
检验给出的路径是否是一个目录:os.path.isdir()
判断是否是绝对路径:os.path.isabs()
检验给出的路径是否真地存:os.path.exists()
返回一个路径的目录名和文件名:os.path.split() e.g os.path.split('/home/swaroop/byte/code/poem.txt') 结果:('/home/swaroop/byte/code', 'poem.txt')
分离扩展名:os.path.splitext() e.g os.path.splitext('/usr/local/test.py') 结果:('/usr/local/test', '.py')
获取路径名:os.path.dirname()
获得绝对路径: os.path.abspath()
获取文件名:os.path.basename()
运行shell命令: os.system()
读取操作系统环境变量HOME的值:os.getenv("HOME")
返回操作系统所有的环境变量: os.environ
设置系统环境变量,仅程序运行时有效:os.environ.setdefault('HOME','/home/alex')
给出当前平台使用的行终止符:os.linesep Windows使用'\r\n',Linux and MAC使用'\n'
指示你正在使用的平台:os.name 对于Windows,它是'nt',而对于Linux/Unix用户,它是'posix'
重命名:os.rename(old, new)
创建多级目录:os.makedirs(r“c:\python\test”)
创建单个目录:os.mkdir(“test”)
获取文件属性:os.stat(file)
修改文件权限与时间戳:os.chmod(file)
获取文件大小:os.path.getsize(filename)
结合目录名与文件名:os.path.join(dir,filename)
改变工作目录到dirname: os.chdir(dirname)
获取当前终端的大小: os.get_terminal_size()
杀死进程: os.kill(10884,signal.SIGKILL) - 部分方法演示
>>> import os
>>>
>>> os.getcwd() # 获取当前路径
'C:\\Users\\Dylan\\AppData\\Local\\Programs\\Python\\Python36'
>>> os.mkdir(os.getcwd()+'\\temp') # 建立文件夹
>>> os.chdir(os.getcwd()+'\\temp') # 改变路径
>>> os.getcwd()
'C:\\Users\\Dylan\\AppData\\Local\\Programs\\Python\\Python36\\temp'
>>> os.listdir('.') # 查看当前路径下的文件和目录
[]
>>> os.mkdir(os.getcwd()+'\\test')
>>> os.listdir('.')
['test']
>>> os.listdir('..') # 查看上一层路径下的文件和目录
['DLLs', 'Doc', 'etc', 'favicon.ico', 'github.ico', 'include', 'Lib', 'libs', 'LICENSE.txt', 'NEWS.txt', 'python.exe', 'python3.dll', 'python36.dll', 'pythonw.exe', 'Scripts', 'share', 'tcl', 'temp', 'Tools', 'vcruntime140.dll']
>>> os.startfile('notepad.exe') # 打开应用程序
>>> os.startfile('chrome.exe')
>>> os.path.splitext('usr/local/test.py') # 分离文件名和后缀
('usr/local/test', '.py') random模块
>>> random.randrange(1,10) #返回1-10之间的一个随机数,不包括10
>>> random.randint(1,10) #返回1-10之间的一个随机数,包括10
>>> random.randrange(0, 100, 2) #随机选取0到100间的偶数
>>> random.random() #返回一个随机浮点数
>>> random.choice('abce3#$@1') #返回一个给定数据集合中的随机字符
'#'
>>> random.sample('abcdefghij',3) #从多个字符中选取特定数量的字符
['a', 'd', 'b']
#生成随机字符串
>>> import string
>>> ''.join(random.sample(string.ascii_lowercase + string.digits, 6)) # string.ascii_letters
'4fvda1'
#洗牌
>>> a
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> random.shuffle(a)
>>> a
[3, 0, 7, 2, 1, 6, 5, 8, 9, 4] hash加密
数据库
爬虫
Request
- get请求
import requests
# 1.指定url
url = 'https://www.sogou.com/'
# 2.发起请求, 返回对象
response = requests.get(url=url)
# 3.获取响应数据, 获取字符串数据
data = response.text
# 4.持久化存储
with open('sogou.html','w',encoding='utf-8') as fp:
fp.write(data)
print('搜狗爬取完毕') - get请求
import requests
word = input('Input what you want to search: ')
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
url = 'https://www.sogou.com/web'
param = {
'query':word
}
response = requests.get(url=url,params=param,headers=headers)
page_text = resonse.content.decode('utf-8')
page_text = response.text
filename = word + '.html'
with open(filename,'w',encoding='utf-8') as fp:
fp.write(page_text)
print('在搜狗中的关键词{0}的内容爬取完毕!'.format(word))
- post请求
```python
import requests
import json
word = input('input a English word: ')
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'https://fanyi.baidu.com/v2transapi'
data = {
'from': 'en',
'to': 'zh',
'query': word,
'simple_means_flag': '3',
'sign': '704513.926512', # js算法加密
'token': '052a6ea860aa8b4c05039ac7c185b386'
}
response = requests.post(url=url,data=data,headers=headers)
if response.status_code == 200:
page_json = response.json()
print(page_json)
# filename = word + '.json'
# with open(filename, 'w', encoding='utf-8') as fp:
# json.dump(page_json, fp, ensure_ascii=False)
#
# print('在百度翻译中的关键词{0}的内容爬取完毕!'.format(word))
else:
print('error')- post请求
爬取结果: {'error': 997, 'from': 'en', 'to': 'zh', 'query': 'apple'}
百度翻译实现了加密参数, 无法直接爬取
可参考破解博文: http://www.sohu.com/a/285486121_99987664
- 获取ajax的Json数据
```python
import requests
import json
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'https://movie.douban.com/j/chart/top_list'
param = {
'type': '24',
'interval_id': '100:90',
'start': '0',
'limit': '20'
}
response = requests.get(url=url,params=param,headers=headers)
if response.status_code == 200:
page_json = response.json()
print(page_json)
# filename = word + '.json'
# with open(filename, 'w', encoding='utf-8') as fp:
# json.dump(page_json, fp, ensure_ascii=False)
#
# print('在豆瓣电影中喜剧排行榜内容爬取完毕!')
else:
print('error')- 模拟登陆
- cookie操作
- 代理IP操作
正则解析
BS4
- 常用方法
使用流程:
- 导包:from bs4 import BeautifulSoup
- 使用方式:可以将一个html文档,转化为BeautifulSoup对象,然后通过对象的方法或者属性去查找指定的节点内容
(1)转化本地文件:
- soup = BeautifulSoup(open('本地文件'), 'lxml')
(2)转化网络文件:
- soup = BeautifulSoup('字符串类型或者字节类型', 'lxml')
(3)打印soup对象显示内容为html文件中的内容
基础巩固:
(1)根据标签名查找
- soup.a 只能找到第一个符合要求的标签
(2)获取属性
- soup.a.attrs 获取a所有的属性和属性值,返回一个字典
- soup.a.attrs['href'] 获取href属性
- soup.a['href'] 也可简写为这种形式
(3)获取内容
- soup.a.string # 找直接的, 且不能嵌套
- soup.a.text # 会找下一级, 递归
- soup.a.get_text() # 会找下一级, 递归
【注意】如果标签还有标签,那么string获取到的结果为None,而其它两个,可以获取文本内容
(4)find:找到第一个符合要求的标签
- soup.find('a') 找到第一个符合要求的
- soup.find('a', title="xxx")
- soup.find('a', alt="xxx")
- soup.find('a', class_="xxx")
- soup.find('a', id="xxx")
(5)find_all:找到所有符合要求的标签
- soup.find_all('a')
- soup.find_all(['a','b']) 找到所有的a和b标签
- soup.find_all('a', limit=2) 限制前两个
(6)根据选择器选择指定的内容
select:soup.select('#feng')
- 常见的选择器:标签选择器(a)、类选择器(.)、id选择器(#)、层级选择器
- 层级选择器:
div .dudu #lala .meme .xixi 下面好多级
div > p > a > .lala 只能是下面一级
【注意】select选择器返回永远是列表,需要通过下标提取指定的对象 - 爬取古诗网三国演义小说
from bs4 import BeautifulSoup import requests import os
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
def parse_content(url):
"""
解析每一页的文本数据
:param url: 每一页的链接
:return: 每节内容
"""
page_text = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(page_text,'lxml')
elem = soup.find('div', class_="chapter_content")
content = elem.text
return contentdef write_to_file(title, content):
"""
将数据写入文本
:param content: 文本字符
:return: None
"""
filename = title + '.txt'
title = title.center(100,' ') # 使标题居中
content = title + '\n' + content
with open(filename, 'w', encoding='utf-8') as fp:
fp.write(content)if name == 'main':
url = 'http://www.shicimingju.com/book/sanguoyanyi.html'
page_text = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(page_text, 'lxml')
a_lsit = soup.select('.book-mulu>ul>li>a')
for i in enumerate(a_lsit):
print('开始下载第{0}章节'.format(i[0] + 1))
title = i[1].string
content_url = 'http://www.shicimingju.com' + i[1]["href"]
content = parse_content(content_url)
write_to_file(title, content)
print('第{0}章下载完成'.format(i[0] + 1))
break
### Xpath
- 常用方法
```python
属性定位:
#找到class属性值为song的div标签
//div[@class="song"]
层级&索引定位:
#找到class属性值为tang的div的直系子标签ul下的第二个子标签li下的直系子标签a
//div[@class="tang"]/ul/li[2]/a
逻辑运算:
#找到href属性值为空且class属性值为du的a标签
//a[@href="" and @class="du"]
模糊匹配:
//div[contains(@class, "ng")]
//div[starts-with(@class, "ta")]
取文本:
# /表示获取某个标签下的文本内容
# //表示获取某个标签下的文本内容和所有子标签下的文本内容
//div[@class="song"]/p[1]/text()
//div[@class="tang"]//text()
取属性:
//div[@class="tang"]//li[2]/a/@href- 本地文件:tree = etree.parse(文件名)
tree.xpath("xpath表达式")
- 网络数据:tree = etree.HTML(网页内容字符串)
tree.xpath("xpath表达式") - 解析58二手房的相关数据
https://sz.58.com/ershoufang/pn
- 爬取网站图片
import requests
from lxml import etree
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'http://jandan.net/ooxx/page-16'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
imgCode_list = tree.xpath('//div[@class="text"]//img/@src')
for img_url in imgCode_list:
img_url = 'http:' + img_url
img_data = requests.get(url=img_url, headers=headers).content
with open('img.jpg','wb') as fp:
fp.write(img_data)
break - 爬取各城市空气质量数据
import requests
from lxml import etree
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'https://www.aqistudy.cn/historydata/'
response = requests.get(url=url,headers=headers)
#获取页面原始编码格式
print(response.encoding)
page_text = response.text
tree = etree.HTML(page_text)
li_list = tree.xpath('//div[@class="bottom"]//li')
for li in li_list:
city_name = li.xpath('./a/text()')[0]
city_url = 'https://www.aqistudy.cn/historydata/' + li.xpath('./a/@href')[0]
print(city_name, city_url)