Python基础语法速查

Python基础语法速查

Python编码规范

基础语法(上)

导包

import 模块名 [as 别名]

import numpy as np

print(np.sin(x))

from 模块名 import 对象名 [as 别名]

from matplotlib.pyplot as plt

plt.plot(x,y)

from 模块名 import *

from math import *

print(pi)

name属性

自己模块叫main, 导入其他模块之后叫自己模块名

def main():
    if __name__ == '__main__':
        print('This program is run directly.')
    elif __name__ == 'hello.py':
        print('This program is used as a module.')

其可以让某些代码块中,只运行本模块的代码,导入的代码不运行

if __name__ == '__main__':
    pass

变量

name1 = "张三"
name2 = name1
print(name1, name2)
name2 = "李四"
print(name1, name2)
张三 张三
张三 李四
  • 引用机制, 内存中开辟空间并赋值"张三" 用name1指向"张三"的地址

  • name2指向name1指向的地址

  • 内存中开辟新空间并赋值"李四", 更改name2的指向方向, 使其指向"李四"的地址

查看内存地址和判断数据类型

>>> x=3
>>> id(x)
10968864
>>> type(x)
<class 'int'>
>>> type(x) == int
True
>>> isinstance(x,int)
True

注释

# 我是注释

"""
我也是注释
"""

用户输入输出

name = input("What is your name? ")
age = input(int("How old are you? "))
age = input(eval("How old are you? "))
print(1,2,3,sep='\t')
print(1,end=' ')

格式化打印

print('a={0}, b={1}, c={2}'.format('a','b','c'))
a=a, b=b, c=c

数字

>>> 9999**99
990148353526723487602263124753282625570559528895791057324326529121794837894053513464422176826916433932586924386677766244032001623756821400432975051208820204980098735552703841362304669970510691243800218202840374329378800694920309791954185117798434329591212159106298699938669908067573374724331208942425544893910910073205049031656789220889560732962926226305865706593594917896276756396848514900989999
>>> 0.3+0.2
0.5
>>> 0.4-0.1
0.30000000000000004
>>> 0.4-0.1 == 0.3
False
>>> abs(0.4-0.3-0.1) < 1e-6
True
>>> x=3+4j
>>> y=5+6j
>>> x+y
(8+10j)
>>> x*y
(-9+38j)
>>> abs(x)
5.0
>>> x.imag
4.0
>>> x.real
3.0
>>> x.conjugate()
(3-4j)

分数

>>> from fractions import Fraction
>>> x = Fraction(3,5)
>>> y = Fraction(3,7)
>>> x
Fraction(3, 5)
>>> x**2
Fraction(9, 25)
>>> x.numerator
3
>>> x.denominator
5
>>> x+y
Fraction(36, 35)
>>> x-y
Fraction(6, 35)
>>> x*y
Fraction(9, 35)
>>> x/y
Fraction(7, 5)
>>> x*2
Fraction(6, 5)
>>> Fraction(3.5)
Fraction(7, 2)
>>> from fractions import Decimal
>>> 1/9
0.1111111111111111
>>> Decimal(1/9)
Decimal('0.111111111111111104943205418749130330979824066162109375')
>>> 1/3
0.3333333333333333
>>> Decimal(1/3)
Decimal('0.333333333333333314829616256247390992939472198486328125')
>>> Decimal(1/9) + Decimal(1/3)
Decimal('0.4444444444444444197728216750')

类型转换

>>> bin(555)    # 转化为二进制
'0b1000101011'
>>> oct(555)    # 转化为八进制
'0o1053'
>>> hex(555)    # 转化为十六进制
'0x22b'
>>> int('0x22b', 16)    # 十六进制转化为十进制
555
>>> int('0o1053',8)        # 八进制转化为十进制
555
>>> int('0b1000101011',2)    # 二进制转化为十进制
555
>>> ord('a')    # 查看指定字符的 Unicode 编码
97
>>> ord('栋')    # 支持中文
26635
>>> chr(26635)    # 查看对应字符
'栋'

流程控制语句

age = 48
while True:
    guess = int(input("你猜我几岁: "))
    if guess > age :
        print("猜的太大了,往小里试试...")
    elif guess < age :
        print("猜的太小了,往大里试试...")
    else:
        print("恭喜你,猜对了...")
        break
你猜我几岁: 89
猜的太大了,往小里试试...
你猜我几岁: 12
猜的太小了,往大里试试...
你猜我几岁: 48
恭喜你,猜对了...

身份运算

# is / is not 判定是否引用同一个对象, 比较内存地址
# 比较某个变量是否是某个类型

age = 99
age2 = 99
print(type(age) is int)
print(age2 is age)
print(id(age), id(age2))
True
True
2012969040 2012969040

三元运算

a = 3
b = 4
print(a > b?a:b)
max = a if a > b else b

基础语法(中)

列表

创建

>>> a = [1,2,3,4,'4']
>>> a
[1, 2, 3, 4, '4']
>>> list((1,2,3,4))
[1, 2, 3, 4]
>>> list(range(0,10,2))
[0, 2, 4, 6, 8]
>>> list({a:'2',b:'3'})
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: unhashable type: 'list'
>>> list({'a':3,'b':4})
['a', 'b']
>>> list({'a':3,'b':4}.items())
[('a', 3), ('b', 4)]

访问

>>> a = [1,2,3,4,'4']
>>> a[0]
1
>>> a[-1]
'4'

追加

names = ["钢铁侠", "蜘蛛侠"]
names.append("蝙蝠侠")
print(names)
['钢铁侠', '蜘蛛侠', '蝙蝠侠']

插入

names = ['钢铁侠', '蜘蛛侠', '蝙蝠侠']
names.insert(1,"路飞")
print(names)
['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠']

合并

names = ['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠']
temp = ["a", "b", "c", "d"]
names.extend(temp)
print(names)
['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']

列表嵌套

names = ['钢铁侠', '路飞', '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
names.insert(2,[1,2,3])
print(names)
['钢铁侠', '路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']

del 删除

names = ['钢铁侠', '路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
del(names[0])
print(names)
['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']

pop 删除

names = ['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c', 'd']
print(names.pop())
print(names.pop(1))
d
[1, 2, 3]

count计数

>>> x = [1,2,2,2,3,4,5,5,6]
>>> x.count(2)
3

index索引

>>> x = [1,2,2,2,3,4,5,5,6]
>>> x.index(2)
1

sort就地排序

>>> x=list(range(11))
>>> import random
>>> random.shuffle(x)
>>> x
[10, 3, 9, 4, 7, 6, 2, 5, 1, 0, 8]
>>> x.sort(reverse=True)
>>> x
[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

reverse就地逆转

>>> x.reverse()
>>> x
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

copy浅拷贝

>>> x=[1,2,[3,4]]
>>> y=x.copy()
>>> y
[1, 2, [3, 4]]
>>> y[2].append(5)
>>> y
[1, 2, [3, 4, 5]]
>>> x
[1, 2, [3, 4, 5]]

deepcopy深拷贝

>>> import copy
>>> x=[1,2,[3,4]]
>>> y=copy.deepcopy(x)
>>> x[2].append(5)
>>> x
[1, 2, [3, 4, 5]]
>>> y
[1, 2, [3, 4]]

列表推导式

>>> l = [x*x for x in range(10)]
>>> l
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

>>> sum([2**i for i in range(64)])
18446744073709551615

嵌套列表平铺

>>> vec = [[1,2,3],[4,5,6],[7,8,9]]
>>> [num for elemen in vec for num in elem]
>>> [num for elem in vec for num in elem]
[1, 2, 3, 4, 5, 6, 7, 8, 9]

过滤不符合条件的元素

>>> l = [-1,-4,6,7,-2]
>>> [i for i in l if i>0]
[6, 7]

切片

>>> l = list(range(10))
>>> l
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> l[0:3]
[0, 1, 2]
>>> l[0:3:2]
[0, 2]
>>> l[::-1]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

clear 清空

names = ['路飞', [1, 2, 3], '蜘蛛侠', '蝙蝠侠', 'a', 'b', 'c']
names.clear()
print(names)
[]

元组

不可变的列表

创建

ages = (18,19,20,33,83)
ages = tuple([18,29,23,40])

常用操作

#索引
>>> ages = (11, 22, 33, 44, 55)
>>> ages[0]
11
>>> ages[3]
44
>>> ages[-1]
55
#切片:同list  
#循环
>>> for age in ages:
    print(age)
11
22
33
44
55
#长度
>>> len(ages)
5
#包含
>>> 11 in ages
True
>>> 66 in ages
False
>>> 11 not in ages
False

注意:元组本身不可变,如果元组中还包含其他可变元素,这些可变元素可以改变

>>> data 
(99, 88, 77, ['Alex', 'Jack'], 33)
>>> data[3][0] = '金角大王'
>>> data
(99, 88, 77, ['金角大王', 'Jack'], 33)

字典

创建操作

>>>person = {"name": "dylan", 'age': 18} 
#或
>>>person = dict(name='seven', age=20)
#或
>>>person = dict({"name": "egon", 'age': 20})
#或
>>> {}.fromkeys([1,2,3,4,5,6,7,8],100)
{1: 100, 2: 100, 3: 100, 4: 100, 5: 100, 6: 100, 7: 100, 8: 100} 

增加操作

names = {    
    "dylan": [18, "CEO", 66000],    
    "黑姑娘": [24, "行政", 4000],
}
# 新增k
names["佩奇"] = [26, "讲师", 40000]

删除操作

names.pop("dylan") # 删除指定key
names.popitem()   # 随便删除1个keydel 
del names["abc"] # 删除指定key,同pop方法
names.clear()     # 清空dict

修改操作

dic['key'] = 'new_value' # 如果key在字典中存在,'new_value'将会替代原来的value值;dic.update(dic2)         # 将字典dic2的键值对添加到字典dic中

查操作

dic['key'] #返回字典中key对应的值,若key不存在字典中,则报错;

'key' in dic #若存在则返回True,没有则返回False

dic.get['key'] #不存在返回默认值
>>> dic = dict(name='dylan')
>>> dic
{'name': 'dylan'}
>>> dic.get('a')
>>> dic.get('a','Not exists.')
'Not exists.'


dic.keys() #返回一个包含字典所有KEY的列表;
dic.values() #返回一个包含字典所有value的列表;
dic.items() #返回一个包含所有(键,值)元组的列表;        

循环

1、for k in dic.keys()
2、for k,v in dic.items() 
3、for k in dic   # 推荐用这种,效率速度最快

info = {    
    "name":"dylan",    
    "age": 18,    
    }

for k in info:    
    print(k,info[k])

# 输出
name dylan
age 18

求长度

len(dic)

集合

创建集合

>>> a = {1,2,3,4,2,'alex',3,'rain','abc'}
>>> a{1, 2, 3, 4, 'abc', 'rain'}

由于它是天生去重的,重复的值你根本存不进去

帮列表去重

帮列表去重最快速的办法是什么? 就是把它转成集合,去重完,再转回列表

>>> b[1, 2, 3, 4, 2, 'abc', 3, 'rain', 'abc']
>>> set(b)
>>> {1, 2, 3, 4, 'abc', 'rain'}
>>> b = list(set(b)) #一句代码搞定
>>> b[1, 2, 3, 4, 'abc', 'rain']

增删改查

>>> a
{1, 2, 3, 4, 'abc', 'rain'}
#新增
>>> a.add('黑姑娘')  
#删除discard
>>> a
{2, 3, '黑姑娘', 'abc', 'rain'}
>>> a.discard('rain')   #删除一个存在的值
>>> a.discard('rain2')   #如果这个值不存在,do nothing.
>>> a
{2, 3, '黑姑娘', 'abc'}
>>> #随机删除,少用,或特定场景用
>>> a.pop() #删除并返回1
#删除remove
>>> a.remove(4)
#查
>>> a
{2, 3, '黑姑娘', 'abc', 'rain'}
>>> 'abc' in aTrue #改呵呵,不能改

关系运算

s_1024 = {"佩奇","老男孩","海峰","马JJ","老村长","黑姑娘","Alex"}
s_pornhub = {"Alex","Egon","Rain","马JJ","Nick","Jack"}
print(s_1024 & s_pornhub)  # 交集, elements in both set
print(s_1024 | s_pornhub)  # 并集 or 合集
print(s_1024 - s_pornhub)  # 差集 , only in 1024
print(s_pornhub - s_1024)  # 差集,  only in pornhub
print(s_1024 ^ s_pornhub)  # 对称差集, 把脚踩2只船的人T出去

两个集合之间一般有三种关系,相交、包含、不相交。在Python中分别用下面的方法判断:

print(s_1024.isdisjoint(s_pornhub))     # 判断2个集合是不是不相交,返回True or False
print(s_1024.issubset(s_pornhub))       # 判断s_1024是不是s_pornhub的子集,返回True or False
print(s_1024.issuperset(s_pornhub))     # 判断s_1024是不是s_pornhub的父集,返回True or False

内置函数

enumerate()

>>> list(enumerate('abcd'))
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')]
>>> list(enumerate(['Python','Java']))
[(0, 'Python'), (1, 'Java')]
>>> list(enumerate({'a':97,'b':98}))
[(0, 'a'), (1, 'b')]
>>> list(enumerate({'a':97,'b':98}.items()))
[(0, ('a', 97)), (1, ('b', 98))]
>>> for index,value in enumerate(range(10,15)):
...     print((index,value),end=' ')
... 
(0, 10) (1, 11) (2, 12) (3, 13) (4, 14) 

# 带start参数
>>> for item in enumerate(range(5),6):
...     print(item, end=' ')
... 
(6, 0) (7, 1) (8, 2) (9, 3) (10, 4) 

map()

>>> list(map(str,range(5)))
['0', '1', '2', '3', '4']
>>> def add5(v):
...     return v+5
... 
>>> list(map(add5,range(10)))
[5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
>>> def add(x,y):
...     return x+y
... 
>>> list(map(add,range(5),range(5,10)))
[5, 7, 9, 11, 13]
>>> list(map(lambda x,y:x+y,range(5),range(5,10)))
[5, 7, 9, 11, 13]


# 自定义比较复杂的map映射函数
>>> def myMap(lst,value):
...     return map(lambda item:item+value, lst)
... 
>>> list(myMap(range(5),5))
[5, 6, 7, 8, 9]
>>> list(myMap(range(5),8))
[8, 9, 10, 11, 12]

# 实现序列与数字的四则运算
>>> def myMap(iterable, op, value):
...     if op not in '+-*/':
...             return 'Error operator'
...     func = lambda i:eval(repr(i)+op+repr(value))
...     return map(func,iterable)
... 
>>> list(myMap(range(5),'+',5))
[5, 6, 7, 8, 9]
>>> a,b,c=map(int,'123')
>>> a
1
>>> b
2
>>> c
3

reduce()

>>> from functools import reduce
>>> def add(x,y):
...     return x+y
... 
>>> reduce(add,range(1,10))
45
>>> reduce(lambda x,y:x+y, range(1,10))
45
>>> import operator
>>> reduce(operator.add, range(1,10))
45
>>> reduce(operator.add, range(1,10),5)
50
>>> reduce(operator.mul, range(1,6))
120
>>> reduce(operator.add, map(str, range(1,10)))
'123456789'
>>> ''.join(map(str,range(1,10)))
'123456789'

filter()

>>> seq = ['foo','x41','?!','***']
>>> seq
['foo', 'x41', '?!', '***']
>>> def func(x):
...     return x.isalnum()    # 判断是否为字符或数字
... 
>>> filter(func,seq)
<filter object at 0x7f7830f736a0>
>>> list(filter(func,seq))
['foo', 'x41']
>>> seq
['foo', 'x41', '?!', '***']
>>> [x for x in seq if x.isalnum()]
['foo', 'x41']
>>> list(filter(lambda x:x.isalnum(), seq))
['foo', 'x41']
>>> list(filter(None,[123,0,0,4,0,5]))
[123, 4, 5]

range()

>>> list(range(2,10))
[2, 3, 4, 5, 6, 7, 8, 9]
>>> list(range(2,10,2))
[2, 4, 6, 8]
>>> for i in range(4):
...     print(i)
... 
0
1
2
3

zip()

>>> list(zip('abcd',[1,2,3]))
[('a', 1), ('b', 2), ('c', 3)]
>>> list(zip('abc'))
[('a',), ('b',), ('c',)]
>>> list(zip('123','abc','!@#'))
[('1', 'a', '!'), ('2', 'b', '@'), ('3', 'c', '#')]
>>> for item in zip('abcd',range(3)):
...     print(item)
... 
('a', 0)
('b', 1)
('c', 2)
>>> x = zip('abcd','1234')
>>> list(x)
[('a', '1'), ('b', '2'), ('c', '3'), ('d', '4')]
>>> list(x) # 只能遍历一次
[]

eval()

>>> eval('3+5')
8
>>> eval('9')
9
>>> eval('4 / 5')
0.8
>>> eval('4' + '/' + '5')
0.8

函数

普通函数

d = {"name":"Dylan","age":26,"hobbie":"游泳"}
l = ["Rebeeca","Katrina","Rachel"]
def change_data(info,girls):
    info["hobbie"] = "学习"
    girls.append("XiaoYun")
change_data(d,l)
print(d,l)
{'name': 'Dylan', 'age': 26, 'hobbie': ***习'} ['Rebeeca', 'Katrina', 'Rachel', 'XiaoYun']

参数问题

def func(a,b,c=3):
    print(a,b,c)

func(1,2)

# 封装成元祖和字典
def func(*args,**kwargs):
    print(args)
    print(kwargs)

func('a','b',name='dylan',age=18)

匿名函数

#这段代码
def calc(x,y):
    return x**y
print(calc(2,5))

#换成匿名函数
calc = lambda x,y:x**y
print(calc(2,5))
32
32
res = map(lambda x:x**2,[1,5,7,4,8])
for i in res:
    print(i)
1
25
49
16
64

高阶函数

def get_abs(n):
    if n < 0 :
        n = abs(n)
    return n
def add(x,y,f):
    return f(x) + f(y)
res = add(3,-6,get_abs)
print(res)

闭包

def outer():
    name = 'alex'

    def inner():
        print("在inner里打印外层函数的变量",name)

    return inner # 注意这里只是返回inner的内存地址,并未执行

f = outer() # .inner at 0x1027621e0> 
f()  # 相当于执行的是inner()

装饰器

def use_logging(func):

    def wrapper():
        print("%s is running" % func.__name__)
        func()
    return wrapper

def foo():
    print("i am foo")

foo = use_logging(foo)

foo()
def use_logging(func):

    def wrapper():
        print("%s is running" % func.__name__)
        func()
    return wrapper

@use_logging  
def foo():
    print("i am foo")

foo()

带参数的装饰器

def use_loggin(level):
    def decorator(func):
        def wrapper(*args, **kwargs):
            if level == 'warn':
                print('warn')
            elif level == 'info':
                print('info')
            func(*args)
        return wrapper
    return decorator

@use_loggin(level='warn')
def foo(name='foo'):
    print('I am foo')

foo()

类装饰器

class Foo(object):
    def __init__(self, func):
        self._func = func

    def __call__(self):
        print ('class decorator runing')
        self._func()
        print ('class decorator ending')

@Foo
def bar():
    print ('bar')

bar()

生成器

>>> g = (x for x in range(5))
>>> next(g)
0
>>> next(g)
1
>>> next(g)
2
>>> next(g)
3
>>> next(g)
4
>>> next(g)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
StopIteration
>>> g = (x for x in range(5))
>>> for i in g:
...     print(i)
... 
0
1
2
3
4
def func():
    for i in range(10):
        yield i

f = func()

print(f)
print(f.__next__())
print(f.__next__())
print('干点其它事')
print(f.__next__())
print(f.__next__())
<generator object func at 0x7f239cc55308>
0
1
干点其它事
2
3

面向对象

基础语法

class Dog:  
    d_type = "京巴"  
    def __init__(self,name,age,master): 
        print('初始化这个实例....',name)
        self.name = name  
        self.age = age
        self.master = master

    def say_hi(self):  
        print("hello , I am a dog,my type is ",self.d_type) 

d = Dog("毛毛",2,"Alex")  
d2 = Dog("二蛋",3,"Jack")  

d.say_hi()  
d2.say_hi()
print(d2.name, d2.age, d2.master) 
初始化这个实例.... 毛毛
初始化这个实例.... 二蛋
hello , I am a dog,my type is  京巴
hello , I am a dog,my type is  京巴
二蛋 3 Jack

依赖关系

class Dog:
    def __init__(self,name,age,breed,master):
        self.name = name
        self.age = age
        self.breed = breed
        self.master = master # master传进来的应该是个对象
        self.sayhi()  # 调用自己的方法在实例化的时候
    def sayhi(self):
        print("Hi, I'm %s, a %s dog, my master is %s" %(self.name,self.breed,self.master.name))

class Person:
    def __init__(self,name,age,sex):
        self.name = name
        self.age = age
        self.sex = sex
    def walk_dog(self,dog_obj):
        """遛狗"""
        print("主人[%s]带狗[%s]去溜溜." % (self.name,dog_obj.name ))

p = Person("Dylan",18,"Male")
d = Dog("Jack",5,"二哈",p)

p.walk_dog(d)
Hi, I'm Jack, a 二哈 dog, my master is Dylan
主人[Dylan]带狗[Jack]去溜溜

组合关系

class Dog:  # 定义一个狗类
    role = 'dog'  # 狗的角色属性都是狗
    ......

class Weapon:
    def stick(self,obj):
        """打狗棒"""
        self.name = "打狗棒"
        self.attack_val = 40
        obj.life_val -= self.attack_val
        self.print_log(obj)

    def knife(self,obj):
        """屠龙刀"""
        self.name = "屠龙刀"
        self.attack_val = 80
        obj.life_val -= self.attack_val
        self.print_log(obj)

    def gun(self,obj):
        """AK47"""
        self.name = "AK47"
        self.attack_val = 100
        obj.life_val -= self.attack_val
        self.print_log(obj)

    def print_log(self,obj):
        print("[%s]被[%s]攻击了,掉血[%s],还剩血量[%s]..." %(obj.name,self.name,self.attack_val,obj.life_val))
class Person:  # 定义一个人类
    role = 'person'  # 人的角色属性都是人

    def __init__(self, name, sex, attack_val):
        self.name = name
        self.attack_val = attack_val
        self.life_val = 100
        self.sex = sex
        self.weapon = Weapon() # 在此处实例化一个Weapon对象

    def attack(self,dog):
        # 人可以攻击狗,这里传递进来的dog也是一个对象。
        # 人攻击狗,那么狗的生命值就会根据人的攻击力而下降
        dog.life_val -= self.attack_val
        print("人[%s]打了狗[%s],狗掉血[%s],还剩血量[%s]..." % (self.name,dog.name,self.attack_val,dog.life_val))

d = Dog("mjj","二哈",20)
p = Person("Alex","Male",60)

d.bite(p) # 对象交互,把p实例传递给d的方法
p.attack(d)

p.weapon.knife(d)  # 通过组合的方式调用weapon实例下的具体武器
p.weapon.stick(d) 
class BirthDate:
    def __init__(self, year, month, day):
        self.year = year
        self.month = month
        self.day = day
class Course:
    def __init__(self, name, price, period):
        self.name = name
        self.price = price
        self.period = period
class Teacher:
    def __init__(self, name, gender, birth, course):
        self.name = name
        self.gender = gender
        self.birth = birth
        self.course = course
    def teaching(self):
        print('teaching.....',self.course.name)
p1 = Teacher('Alex', 'Male',
                BirthDate('1995', '1', '27'),
                Course('Python', '28000', '5 months')
                )
print(p1.birth.year, p1.birth.month, p1.birth.day)
print(p1.course.name, p1.course.price, p1.course.period)

属性的增删改查

class Person:
    nationality = "Chinese"
    addr = "北京"
    def __init__(self,name,age,sex):
        self.name = name
        self.age = age
        self.sex = sex

        # 实例属性操作
p = Person("Dylan",18,"Male")
p.name = "Jack" # 修改属性
p.job = "CEO" # 添加实例属性
del p.sex # 删除实例属性
print(p.job)  #打印添加的实例属性

# 类属性操作
Person.nationality = "US"
Person.race = "Yellow" # 添加类属性
del Person.addr

print(p.addr) # 再调用已删除的类属性就会报错了

继承

class Animal:
    def __init__(self,name,age,sex):
        self.name = name
        self.age = age
        self.sex = sex
    def eat(self): 
        print("[%s] is eating..."%self.name)

class People(Animal):
    def walk(self):
        print("People [%s] is walking..." % self.name)

class Pig(Animal):
    def eat(self): # 对父类的方法进行了重构
        print("Pig [%s] is eating..." % self.name)

person = People("Alex",25,"Male")
pig = Pig("Mjj",4,"公")

person.walk()
person.eat() # 继承自父类的eat方法
pig.eat()
class People(Animal):
    def __init__(self,name,age,sex,race):
        #Animal.__init__(self,name,age,sex) # 先执行父类方法
        super(People,self).__init__(name,age,sex)  
        #super().__init__(name,age,sex)  # 跟上面这行super语法的效果一样,一般用这种写法的多
        self.race = race  # 再加上子类的属性
        print("初始化了一个人....")

C3算法

class A:
    def test(self):
        print('from A')

class B(A):
    # def test(self):
    #     print('from B')
    pass

class B2:
    def test(self):
        print('from B2')

class C(A):
    def test(self):
        print('from C')

class C2:
    def test(self):
        print('from C2')

class D(B,B2):
    # def test(self):
    #     print('from D')
    pass

class E(C,C2):
    def test(self):
        print('from E')

class F(D,E):
    # def test(self):
    #     print('from F')
    pass

f1=F()
f1.test()

print(F.__mro__) # 打印类的继承顺序
from E
(<class '__main__.F'>, <class '__main__.D'>, <class '__main__.B'>, <class '__main__.E'>, <class '__main__.C'>, <class '__main__.A'>, <class '__main__.B2'>, <class '__main__.C2'>, <class 'object'>)

封装

class Test(object):
    def __init__(self, value):
        self.__value = value

    def __get(self):
        return self.__value

    def __set(self, v):
        self.__value = v

多态

统一函数接口实现多态

class Dog(object):
    def sound(self):
        print("汪汪汪.....")

class Cat(object):
    def sound(self):
        print("喵喵喵.....")

def make_sound(animal_type):
    """统一调用接口"""
    animal_type.sound() # 不管你传进来是什么动物,我都调用sound()方法

dogObj = Dog()
catObj = Cat()

make_sound(dogObj)  
make_sound(catObj)

通过抽象类实现多态 (最常用)

from abc import ABCMeta, abstractmethod

class Animal(metaclass=ABCMeta):

    @abstractmethod
    def show(self):
        print('I am an animal.')

class Cat(Animal):
    def show(self):
        print('I am a cat.')

class Dog(Animal):
    def show(self):
        print('I am a dog.')

class Tiger(Animal):
    def show(self):
        print('I am a tiger.')

x = [item() for item in (Cat, Dog, Tiger)]

for item in x:
    item.show()

类方法

class Student(object):
    __stu_num = 0  # 学员计数需存在类变量里,不能存在每个实例里

    def __init__(self,name):
        self.name = name
        self.add_stu_num() # 相当于Student.add_stu_num() 初始化学员时调用

    @classmethod
    def add_stu_num(cls): # 注意这里调用时传进来的其实是类本身,不是实例本身,所以参数名一般改为cls
        cls.__stu_num += 1
        print("total student num:",cls.__stu_num)

s1 = Student("张1")
s2 = Student("张2")
s3 = Student("张3")
s4 = Student("张4")

Student.add_stu_num() # 也可以这样调

静态方法

class Student(object):
    stu_num = 0
    def __init__(self,name):
        self.name = name

    @staticmethod
    def fly():
        print('static')

Student.fly()

Property

class Flight(object):

    def __init__(self,name):
        self.flight_name = name

    def checking_status(self):
        print("connecting airline company api...... " )
        print("checking flight %s status " % self.flight_name)
        return  1

    @property
    def flight_status(self):
        status = self.checking_status()
        if status == 0 :
            print("flight got canceled...")
        elif status == 1 :
            print("flight is arrived...")
        elif status == 2:
            print("flight has departured already...")
        else:
            print("cannot confirm the flight status...,please check later")

    @flight_status.setter # 修改
    def flight_status(self,status):
        print('change.') 

    @flight_status.deleter
    def flight_status(self):
        print('delete.')

f = Flight("CA980")
f.flight_status
f.flight_status = 1
del f.flight_status

f.flight_status

反射

基础语法(下)

异常处理

try-except

a = input('Please input: ')
try:
    x = int(a);   
except Exception as e:   
    print('Error!')    

try-except-else

a = input('Please input: ')
try:
    x = int(a);   
except Exception as e:   
    print('Error!')    
else:
    print('else') # try未发生异常就执行

try-except-finally

a = input('Please input: ')
try:
    x = int(a);   
except Exception as e:   
    print('Error!')    
finally:
    print('fianlly') # 无论try发不发生异常,都会执行

捕获多种异常

try:
    x=float(input('请输入除数: '))
    y=float(input('请输入被除数: '))
    z=x/y
except ZeroDivisionError:
    print('除数不能为零')
except TypeError:
    print('被除数和除数应该为数值类型')
except NameError:
    print('变量不存在')
else:
    print(x,'/',y,'=',z)
try:
    x=float(input('请输入除数: '))
    y=float(input('请输入被除数: '))
    z=x/y
except (ZeroDivisionError, TypeError, NameError):
    print('Error')
else:
    print(x,'/',y,'=',z)

复合异常语句

def div(x,y):
    try:
        z=x/y
    except ZeroDivisionError:
        print('除数不能为零')
    except TypeError:
        print('被除数和除数应该为数值类型')
    else:
        print('x,'/',y,'=',z')
    finally:
        print('executing finally clause')

操作文件

  • 将字符串写入文本文件,然后再读取并输出
s='Hello world\n文本文件的读取方法\n文本文件的写入方法\n'

with open('sample.txt','w') as fp:
    fp.write(s)

with open('sample.txt','r') as fp:
    conten = fp.read()
    print(conten)
  • 拷贝文本文件
def fileCopy(src,dst,srcEncoding,dstEncoding):
    with open(src,'r',encoding=srcEncoding) as srcfp:
        with open(dst,'w',encoding=dstEncoding) as dstfp:
            dstf.write(srcfp.read())

fileCopy('sample.txt','sample_new.txt','gbk','utf-8')
  • readline()
with open('temp.txt','r') as fp:
    data = fp.readline()
    print(data.strip())
  • readlines()
with open('temp.txt','r') as fp:
    data = fp.readlines()
    for i in data:
        print(i.strip())
  • json()

dumps把数据类型转换成字符串

dump把数据类型转换成字符串并存储在文件中

loads把字符串转换成数据类型

load把文件打开从字符串转换成数据类型

import json

test_dict = {'bigberg': [7600, {1: [['iPhone', 6300], ['Bike', 800], ['shirt', 300]]}]}
print(test_dict)
print(type(test_dict))

#dumps 将字典转换成json字符串
json_str = json.dumps(test_dict)

# loads 将json字符串转化为字典
new_dict = json.loads(json_str)
print(new_dict)
print(type(new_dict))

# 写入json文件
with open('temp.json', 'w') as fp:
    json.dump(new_dict, fp)

# 读取json文件
with open('temp.json', 'r') as fp:
    load_dic = json.load(fp)
    print(load_dic)

字符串

>>> x = 'abc'
>>> x
'abc'
>>> y = "xyz"
>>> y
'xyz'
>>> x + y
'abcxyz'
>>> type('Hello world')
<class 'str'>
>>> type(b'Hello world')
<class 'bytes'>
>>> 'Hello world'.encode('utf-8')
b'Hello world'
>>> 'Hello world'.encode('gbk')
b'Hello world'

>>> a = '洪世贤'.encode('utf-8')
>>> a
b'\xe6\xb4\xaa\xe4\xb8\x96\xe8\xb4\xa4'
>>> a.decode('utf-8')
'洪世贤'
>>> b = '洪世贤'.encode('gbk')
>>> b
b'\xba\xe9\xca\xc0\xcf\xcd'
>>> b.decode('gbk')
'洪世贤'
  • GB2312 :2个字节表示中文
  • GBK:2个字节表示中文
  • UTF-8: 3个字节表示中文

find()、 rfind()、 index()、 rindex()、 count()

>>> s = "apple, peach, banana, peach, pear"
>>> s.find("peach") 
7
>>> s.find("peach",9) 
22
>>> s.find("peach",9, 20) # 指定位置查找, 不存在返回-1
-1
>>> s.rfind('p') # 从右往前找
29
>>> s.index('p')
1
>>> s.rindex('pe')
29
>>> s.index('pear')
29
>>> s.index('ppp') # 不存在报错
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: substring not found
>>> s.count('p') # 计数
5
>>> s.count('ppp') # 不存在返回0
0

split()、 rsplit()、 partition()、 rpartition()

>>> s
'apple, peach, banana, peach'
>>> s.split(",")
['apple', ' peach', ' banana', ' peach']
>>> s="2019-7-10"
>>> t = s.split("-")
>>> t
['2019', '7', '10']
>>> list(map(int,t)) # 将字符串列表转化为int类型列表
[2019, 7, 10]

split()、 rsplit() 默认分割符是任意空白符

>>> s.split('-',maxsplit=1) # 指定最大分割次数
['2019', '7-10']
>>> s.rsplit('-',maxsplit=1)
['2019-7', '10']
>>> s = 'apple, peach, banana, peach'
>>> s.partition(',')
('apple', ',', ' peach, banana, peach')
>>> s.rpartition(',')
('apple, peach, banana', ',', ' peach')
>>> s.rpartition('banana')
('apple, peach, ', 'banana', ', peach')
>>> s.rpartition('xxx')
('', '', 'apple, peach, banana, peach')
>>> 'abababab'.partition('a')
('', 'a', 'bababab')
>>> 'abababab'.rpartition('a')
('ababab', 'a', 'b')

join()

>>> li
['apple', 'banana', 'peach', 'pear']
>>> ','.join(li)
'apple,banana,peach,pear'
>>> ':'.join(li)
'apple:banana:peach:pear'
>>> ''.join(li)
'applebananapeachpear'


# 判断两个字符串除空白符外,内容是否一致
>>> x = 'aaa     bb    cde   ffff   '
>>> ' '.join(x.split())
'aaa bb cde ffff'
>>> def equavilent(s1,s2):
...     if s1 == s2:
...             return True
...     elif ''.join(s1.split()) == ''.join(s2.split()):
...             return True
...     else:
...             return False
... 
>>> equavilent('pip list', 'pip    list')
True
>>> equavilent('pip list', 'pip    li     st')
True
>>> equavilent('   pip list', 'pip    li     st')
True

lower()、 upper()、 capitalize()、 title()、 swapcase()

>>> s = 'What is Your Name?'
>>> s.lower()
'what is your name?'
>>> s.upper()
'WHAT IS YOUR NAME?'
>>> s.capitalize()
'What is your name?'
>>> s.title()
'What Is Your Name?'
>>> s.swapcase()
'wHAT IS yOUR nAME?'

replace()

>>> s = '中国 中国'
>>> print(s.replace('中国', '中华人民共和国'))
中华人民共和国 中华人民共和国
>>> print('abceeeeabcggg'.replace('abc' ,'ABC'))
ABCeeeeABCggg
>>> 

strip()、 lstrip()、 rstrip()

>>> s = '   abc   '
>>> s.lstrip()
'abc   '
>>> s.rstrip()
'   abc'
>>> s.strip()
'abc'
>>> 
>>> 'aaabbdasa'.strip('a')
'bbdas'
>>> 'aaabbdasa'.rstrip('a')
'aaabbdas'
>>> 'aaabbdasa'.strip('ab')
'das'
>>> 'aaabbdasa'.rstrip('ab')
'aaabbdas'

startswith()、 endswith()

>>> s = 'Beautiful is better than ugly'
>>> s.startswith('Be')
True
>>> s.startswith('Be',5)
False
>>> s.startswith('Be',0,5)
True
>>> s.endswith('Be',0,5)
False
>>> s.endswith('ugly')
True

isalnum()、 isalpha()、 isdigit()、 isdecimal()、 isnumeric()、 isspace()、 isupper()、 islower()

>>> '1234abcd'.isalnum()
True
>>> '1234abcd'.isalpha()
False
>>> '1234abcd'.isdigit()
False
>>> '1234.5'.isdigit()
False
>>> '1234'.isdigit()
True
>>> '九'.isnumeric()
True
>>> '1234.5'.isnumeric()
False
>>> '1234.5'.isdecimal()
False
>>> '1.5'.isdecimal()
False
>>> 'ABc'.isupper()
False
>>> 'ABC'.isupper()
True
>>> 'abc'.islower()
True

center()、 ljust()、 rjust()、 zfill()

>>> 'Hello world!'.center(20)
'    Hello world!    '
>>> 'Hello world!'.center(20, '=')
'====Hello world!===='
>>> 'Hello world!'.ljust(20, '=')
'Hello world!========'
>>> 'Hello world!'.rjust(20, '=')
'========Hello world!'
>>> 'abc'.zfill(5)
'00abc'
>>> 'abc'.zfill(2)
'abc'
>>> 'abc'.zfill(20)
'00000000000000000abc'

中英文分词

x = '狗 dog 猫 cat 杯子 cup 桌子 table 你好'
c = []
e = []
t = ''

for ch in x:
    if 'a'<=ch<='z' or 'A'<=ch<='Z':
        t += ch
    elif t:
        e.append(t)
        t = ''

for ch in x:
    if 0x4e00<=ord(ch)<=0x9fa5:
        t += ch
    elif t:
        c.append(t)
        t = ''

print(e)
print(c)

// ['dog', 'cat', 'cup', 'table']
// ['狗', '猫', '杯子', '桌子']

正则表达式

常用模块

os模块

  • 常用方法
得到当前工作目录,即当前Python脚本工作的目录路径: os.getcwd()
返回指定目录下的所有文件和目录名:os.listdir()
函数用来删除一个文件:os.remove()
删除多个目录:os.removedirs(r“c:\python”)
检验给出的路径是否是一个文件:os.path.isfile()
检验给出的路径是否是一个目录:os.path.isdir()
判断是否是绝对路径:os.path.isabs()
检验给出的路径是否真地存:os.path.exists()
返回一个路径的目录名和文件名:os.path.split()     e.g os.path.split('/home/swaroop/byte/code/poem.txt') 结果:('/home/swaroop/byte/code', 'poem.txt') 
分离扩展名:os.path.splitext()       e.g  os.path.splitext('/usr/local/test.py')    结果:('/usr/local/test', '.py')
获取路径名:os.path.dirname()
获得绝对路径: os.path.abspath()  
获取文件名:os.path.basename()
运行shell命令: os.system()
读取操作系统环境变量HOME的值:os.getenv("HOME") 
返回操作系统所有的环境变量: os.environ 
设置系统环境变量,仅程序运行时有效:os.environ.setdefault('HOME','/home/alex')
给出当前平台使用的行终止符:os.linesep    Windows使用'\r\n',Linux and MAC使用'\n'
指示你正在使用的平台:os.name       对于Windows,它是'nt',而对于Linux/Unix用户,它是'posix'
重命名:os.rename(old, new)
创建多级目录:os.makedirs(r“c:\python\test”)
创建单个目录:os.mkdir(“test”)
获取文件属性:os.stat(file)
修改文件权限与时间戳:os.chmod(file)
获取文件大小:os.path.getsize(filename)
结合目录名与文件名:os.path.join(dir,filename)
改变工作目录到dirname: os.chdir(dirname)
获取当前终端的大小: os.get_terminal_size()
杀死进程: os.kill(10884,signal.SIGKILL)
  • 部分方法演示
>>> import os
>>>
>>> os.getcwd() # 获取当前路径
'C:\\Users\\Dylan\\AppData\\Local\\Programs\\Python\\Python36'
>>> os.mkdir(os.getcwd()+'\\temp') # 建立文件夹
>>> os.chdir(os.getcwd()+'\\temp') # 改变路径
>>> os.getcwd() 
'C:\\Users\\Dylan\\AppData\\Local\\Programs\\Python\\Python36\\temp'
>>> os.listdir('.') # 查看当前路径下的文件和目录
[]
>>> os.mkdir(os.getcwd()+'\\test') 
>>> os.listdir('.')
['test']
>>> os.listdir('..') # 查看上一层路径下的文件和目录
['DLLs', 'Doc', 'etc', 'favicon.ico', 'github.ico', 'include', 'Lib', 'libs', 'LICENSE.txt', 'NEWS.txt', 'python.exe', 'python3.dll', 'python36.dll', 'pythonw.exe', 'Scripts', 'share', 'tcl', 'temp', 'Tools', 'vcruntime140.dll']
>>> os.startfile('notepad.exe') # 打开应用程序
>>> os.startfile('chrome.exe')

>>> os.path.splitext('usr/local/test.py') # 分离文件名和后缀
('usr/local/test', '.py')

random模块

>>> random.randrange(1,10) #返回1-10之间的一个随机数,不包括10
>>> random.randint(1,10) #返回1-10之间的一个随机数,包括10
>>> random.randrange(0, 100, 2) #随机选取0到100间的偶数
>>> random.random()  #返回一个随机浮点数
>>> random.choice('abce3#$@1') #返回一个给定数据集合中的随机字符
'#'
>>> random.sample('abcdefghij',3)  #从多个字符中选取特定数量的字符
['a', 'd', 'b']
#生成随机字符串
>>> import string 
>>> ''.join(random.sample(string.ascii_lowercase + string.digits, 6))  # string.ascii_letters
'4fvda1'
#洗牌
>>> a
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> random.shuffle(a)
>>> a
[3, 0, 7, 2, 1, 6, 5, 8, 9, 4]

hash加密

数据库

爬虫

Request

  • get请求
import requests

# 1.指定url
url = 'https://www.sogou.com/'
# 2.发起请求, 返回对象
response = requests.get(url=url)
# 3.获取响应数据, 获取字符串数据
data = response.text
# 4.持久化存储
with open('sogou.html','w',encoding='utf-8') as fp:
    fp.write(data)

print('搜狗爬取完毕')
  • get请求
    import requests
    

word = input('Input what you want to search: ')
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
url = 'https://www.sogou.com/web'
param = {
'query':word
}
response = requests.get(url=url,params=param,headers=headers)

page_text = resonse.content.decode('utf-8')

page_text = response.text

filename = word + '.html'
with open(filename,'w',encoding='utf-8') as fp:
fp.write(page_text)

print('在搜狗中的关键词{0}的内容爬取完毕!'.format(word))

- post请求
```python
import requests
import json

word = input('input a English word: ')
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'https://fanyi.baidu.com/v2transapi'
data = {
    'from': 'en',
    'to': 'zh',
    'query': word,
    'simple_means_flag': '3',
    'sign': '704513.926512', # js算法加密
    'token': '052a6ea860aa8b4c05039ac7c185b386'
}
response = requests.post(url=url,data=data,headers=headers)

if response.status_code == 200:
    page_json = response.json()
    print(page_json)

    # filename = word + '.json'
    # with open(filename, 'w', encoding='utf-8') as fp:
    #     json.dump(page_json, fp, ensure_ascii=False)
    #
    # print('在百度翻译中的关键词{0}的内容爬取完毕!'.format(word))
else:
    print('error')
  • post请求
    爬取结果: 
    {'error': 997, 'from': 'en', 'to': 'zh', 'query': 'apple'}
    

百度翻译实现了加密参数, 无法直接爬取
可参考破解博文: http://www.sohu.com/a/285486121_99987664

- 获取ajax的Json数据
```python
import requests
import json

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url = 'https://movie.douban.com/j/chart/top_list'
param = {
    'type': '24',
    'interval_id': '100:90',
    'start': '0',
    'limit': '20'
}
response = requests.get(url=url,params=param,headers=headers)

if response.status_code == 200:
    page_json = response.json()
    print(page_json)

    # filename = word + '.json'
    # with open(filename, 'w', encoding='utf-8') as fp:
    #     json.dump(page_json, fp, ensure_ascii=False)
    #
    # print('在豆瓣电影中喜剧排行榜内容爬取完毕!')
else:
    print('error')
  • 模拟登陆
  • cookie操作
  • 代理IP操作

正则解析

BS4

  • 常用方法
使用流程:       
    - 导包:from bs4 import BeautifulSoup
    - 使用方式:可以将一个html文档,转化为BeautifulSoup对象,然后通过对象的方法或者属性去查找指定的节点内容
        (1)转化本地文件:
             - soup = BeautifulSoup(open('本地文件'), 'lxml')
        (2)转化网络文件:
             - soup = BeautifulSoup('字符串类型或者字节类型', 'lxml')
        (3)打印soup对象显示内容为html文件中的内容
基础巩固:
    (1)根据标签名查找
        - soup.a   只能找到第一个符合要求的标签
    (2)获取属性
        - soup.a.attrs  获取a所有的属性和属性值,返回一个字典
        - soup.a.attrs['href']   获取href属性
        - soup.a['href']   也可简写为这种形式
    (3)获取内容
        - soup.a.string # 找直接的, 且不能嵌套
        - soup.a.text # 会找下一级, 递归
        - soup.a.get_text() # 会找下一级, 递归
       【注意】如果标签还有标签,那么string获取到的结果为None,而其它两个,可以获取文本内容
    (4)find:找到第一个符合要求的标签
        - soup.find('a')  找到第一个符合要求的
        - soup.find('a', title="xxx")
        - soup.find('a', alt="xxx")
        - soup.find('a', class_="xxx")
        - soup.find('a', id="xxx")
    (5)find_all:找到所有符合要求的标签
        - soup.find_all('a')
        - soup.find_all(['a','b']) 找到所有的a和b标签
        - soup.find_all('a', limit=2)  限制前两个
    (6)根据选择器选择指定的内容
               select:soup.select('#feng')
        - 常见的选择器:标签选择器(a)、类选择器(.)、id选择器(#)、层级选择器
            - 层级选择器:
                div .dudu #lala .meme .xixi  下面好多级
                div > p > a > .lala          只能是下面一级
        【注意】select选择器返回永远是列表,需要通过下标提取指定的对象
  • 爬取古诗网三国演义小说
    from bs4 import BeautifulSoup
    import requests
    import os
    

headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}

def parse_content(url):
"""
解析每一页的文本数据
:param url: 每一页的链接
:return: 每节内容
"""

page_text = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(page_text,'lxml')
elem = soup.find('div', class_="chapter_content")
content = elem.text
return content

def write_to_file(title, content):
"""
将数据写入文本
:param content: 文本字符
:return: None
"""

filename = title + '.txt'
title = title.center(100,' ') # 使标题居中
content = title + '\n' + content

with open(filename, 'w', encoding='utf-8') as fp:
    fp.write(content)

if name == 'main':
url = 'http://www.shicimingju.com/book/sanguoyanyi.html'
page_text = requests.get(url=url, headers=headers).text

soup = BeautifulSoup(page_text, 'lxml')
a_lsit = soup.select('.book-mulu>ul>li>a')

for i in enumerate(a_lsit):
    print('开始下载第{0}章节'.format(i[0] + 1))
    title = i[1].string
    content_url = 'http://www.shicimingju.com' + i[1]["href"]
    content = parse_content(content_url)

    write_to_file(title, content)
    print('第{0}章下载完成'.format(i[0] + 1))
    break
### Xpath

- 常用方法

```python
属性定位:
    #找到class属性值为song的div标签
    //div[@class="song"] 
层级&索引定位:
    #找到class属性值为tang的div的直系子标签ul下的第二个子标签li下的直系子标签a
    //div[@class="tang"]/ul/li[2]/a
逻辑运算:
    #找到href属性值为空且class属性值为du的a标签
    //a[@href="" and @class="du"]
模糊匹配:
    //div[contains(@class, "ng")]
    //div[starts-with(@class, "ta")]
取文本:
    # /表示获取某个标签下的文本内容
    # //表示获取某个标签下的文本内容和所有子标签下的文本内容
    //div[@class="song"]/p[1]/text()
    //div[@class="tang"]//text()
取属性:
    //div[@class="tang"]//li[2]/a/@href
- 本地文件:tree = etree.parse(文件名)
                tree.xpath("xpath表达式")
- 网络数据:tree = etree.HTML(网页内容字符串)
                tree.xpath("xpath表达式")
  • 解析58二手房的相关数据
https://sz.58.com/ershoufang/pn
  • 爬取网站图片
import requests
from lxml import etree

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}

url = 'http://jandan.net/ooxx/page-16'
page_text = requests.get(url=url, headers=headers).text

tree = etree.HTML(page_text)
imgCode_list = tree.xpath('//div[@class="text"]//img/@src')

for img_url in imgCode_list:
    img_url = 'http:' + img_url
    img_data = requests.get(url=img_url, headers=headers).content
    with open('img.jpg','wb') as fp:
        fp.write(img_data)
    break
  • 爬取各城市空气质量数据
import requests
from lxml import etree

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}

url = 'https://www.aqistudy.cn/historydata/'
response = requests.get(url=url,headers=headers)
#获取页面原始编码格式
print(response.encoding)
page_text = response.text
tree = etree.HTML(page_text)

li_list = tree.xpath('//div[@class="bottom"]//li')
for li in li_list:
    city_name = li.xpath('./a/text()')[0]
    city_url = 'https://www.aqistudy.cn/historydata/' + li.xpath('./a/@href')[0]
    print(city_name, city_url)

验证码识别

Selenium

高性能异步爬虫

Scrapy

数据科学

Numpy

Matplotlib

Pandas

Scipy

Seabon

全部评论

相关推荐

1 3 评论
分享
牛客网
牛客企业服务