ch14迭代器和解析,第一部分(python)

>>> for x in [1, 2, 3, 4]: print(x ** 2, end=' ')
...
1 4 9 16

>>> for x in (1, 2, 3, 4): print(x ** 3, end=' ')
...
1 8 27 64

>>> for x in 'spam': print(x * 2, end=' ')
...
ss pp aa mm


#文件的迭代器
>>> f = open('script1.py')     # Read a 4-line script file in this directory
>>> f.readline()               # readline loads one line on each call
'import sys\n'
>>> f.readline()
'print(sys.path)\n'
>>> f.readline()
'x = 2\n'
>>> f.readline()
'print(2 ** 33)\n'
>>> f.readline()               # Returns empty string at end-of-file
''


#文件对象有一个__next__方法,到达文件末尾发生StopIteration异常
>>> f = open('script1.py')     # __next__ loads one line on each call too
>>> f.__next__()               # But raises an exception at end-of-file
'import sys\n'
>>> f.__next__()             
'print(sys.path)\n'
>>> f.__next__()
'x = 2\n'
>>> f.__next__()
'print(2 ** 33)\n'
>>> f.__next__()
Traceback (most recent call last):
...more exception text omitted...
StopIteration

#所有的迭代工具,比如for循环等的内部工作起来都是在每次迭代中调用__next__方法,
#并且捕捉StopIteration异常来确定何时离开



>>> for line in open('script1.py'):       # Use file iterators to read by lines
...     print(line.upper(), end='')       # Calls __next__, catches StopIteration
...
IMPORT SYS
PRINT(SYS.PATH)
X = 2
PRINT(2 ** 33)


#读取文件全部内容,如果文件太大,会造成内存不够
>>> for line in open('script1.py').readlines():
...     print(line.upper(), end='')
...
IMPORT SYS
PRINT(SYS.PATH)
X = 2
PRINT(2 ** 33)


#while版本
>>> f = open('script1.py')
>>> while True:
...     line = f.readline()
...     if not line: break
...     print(line.upper(), end='')
...
...same output...


#2个内置函数 iter和next,iter内置函数用来返回对象的迭代器
#next内置函数会自动调用对象的__next__方法,next(X)等同于X.__next__()

>>> f = open('script1.py')
>>> f.__next__()                   # Call iteration method directly
'import sys\n'
>>> f.__next__()
'print(sys.path)\n'

>>> f = open('script1.py')
>>> next(f)                        # next() built-in calls __next__
'import sys\n'
>>> next(f)
'print(sys.path)\n'


#iter内置函数返回对象的迭代器
>>> L = [1, 2, 3]
>>> I = iter(L)                    # Obtain an iterator object
>>> I.__next__()                       # Call next to advance to next item
1
>>> I.__next__()
2
>>> I.__next__()
3
>>> I.__next__()
Traceback (most recent call last):
...more omitted...
StopIteration


#文件对象是自己的迭代器,所以不需要先调用iter内置函数获得迭代器
>>> f = open('script1.py')
>>> iter(f) is f
True
>>> f.__next__()
'import sys\n'


#列表以及很多其他的内置对象,不是自身的迭代器,因为他们支持多次打开迭代器,
#对这样的对象,我们必须调用iter内置函数来启动迭代

>>> L = [1, 2, 3]
>>> iter(L) is L
False
>>> L.__next__() #错误
AttributeError: 'list' object has no attribute '__next__'

>>> I = iter(L)
>>> I.__next__()
1
>>> next(I)                # Same as I.__next__()
2

################################################################################################
#自动迭代和手动迭代的对等性
>>> L = [1, 2, 3]
>>>
>>> for X in L:                 # Automatic iteration
...     print(X ** 2, end=' ')  # Obtains iter, calls __next__, catches exceptions
...
1 4 9

>>> I = iter(L)                 # Manual iteration: what for loops usually do
>>> while True:
...     try:                    # try statement catches exceptions
...         X = next(I)         # Or call I.__next__
...     except StopIteration:
...         break
...     print(X ** 2, end=' ')
...
1 4 9

#for循环调用内部等价的I.__next__()方法,而不是next(I)
################################################################################################


#字典的迭代器
>>> D = {'a':1, 'b':2, 'c':3}
>>> for key in D.keys():
...     print(key, D[key])
...
a 1
c 3
b 2


#字典的迭代器,返回返回其键
>>> I = iter(D)
>>> next(I)
'a'
>>> next(I)
'c'
>>> next(I)
'b'
>>> next(I)
Traceback (most recent call last):
...more omitted...
StopIteration


#直接使用字典迭代,不需使用keys方法
>>> for key in D:
...     print(key, D[key])
...
a 1
c 3
b 2

#shelves和os.popen 结果也都是可以迭代的

#os.popen的使用
>>> import os
>>> P = os.popen('dir')
>>> P.__next__()
' Volume in drive C is SQ004828V03\n'
>>> P.__next__()
' Volume Serial Number is 08BE-3CD4\n'
>>> next(P)
TypeError: _wrap_close object is not an iterator
#在python2.6中,支持P.next()方法,在python3中P.__next__()方法,不支持next(P)


# 可迭代对象一次返回一个结果,而不是一个实际的列表
>>> R = range(5)
>>> R                            # Ranges are iterables in 3.0
range(0, 5)
>>> I = iter(R)                  # Use iteration protocol to produce results
>>> next(I)
0
>>> next(I)
1
>>> list(range(5))               # Or list() to collect all results at once
[0, 1, 2, 3, 4]


#内置enumerate函数的迭代
>>> E = enumerate('spam')        # enumerate is an iterable too
>>> E
<enumerate object at 0x0253F508>
>>> I = iter(E)
>>> next(I)                      # Generate results with iteration protocol
(0, 's')
>>> next(I)                      # Or use list() to force generation to run
(1, 'p')
>>> list(enumerate('spam'))
[(0, 's'), (1, 'p'), (2, 'a'), (3, 'm')]


#列表解析
>>> L = [1, 2, 3, 4, 5]

>>> for i in range(len(L)):
...     L[i] += 10
...
>>> L
[11, 12, 13, 14, 15]



>>> L = [x + 10 for x in L]
>>> L
[21, 22, 23, 24, 25]


#列表解析的基础知识
>>> L = [x + 10 for x in L]


>>> res = []
>>> for x in L:
...     res.append(x + 10)
...
>>> res
[21, 22, 23, 24, 25]



#在文件上使用列表解析
>>> f = open('script1.py')
>>> lines = f.readlines()
>>> lines
['import sys\n', 'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n']


#把那个换行符去掉
>>> lines = [line.rstrip() for line in lines]
>>> lines
['import sys', 'print(sys.path)', 'x = 2', 'print(2 ** 33)']


#最终形式
>>> lines = [line.rstrip() for line in open('script1.py')]
>>> lines
['import sys', 'print(sys.path)', 'x = 2', 'print(2 ** 33)']


#列表解析的几个例子
>>> [line.upper() for line in open('script1.py')]
['IMPORT SYS\n', 'PRINT(SYS.PATH)\n', 'X = 2\n', 'PRINT(2 ** 33)\n']

>>> [line.rstrip().upper() for line in open('script1.py')]
['IMPORT SYS', 'PRINT(SYS.PATH)', 'X = 2', 'PRINT(2 ** 33)']

>>> [line.split() for line in open('script1.py')]
[['import', 'sys'], ['print(sys.path)'], ['x', '=', '2'], ['print(2', '**','33)']]

>>> [line.replace(' ', '!') for line in open('script1.py')]
['import!sys\n', 'print(sys.path)\n', 'x!=!2\n', 'print(2!**!33)\n']

>>> [('sys' in line, line[0]) for line in open('script1.py')]
[(True, 'i'), (True, 'p'), (False, 'x'), (False, 'p')]


#列表解析支持if过滤条件
>>> lines = [line.rstrip() for line in open('script1.py') if line[0] == 'p']
>>> lines
['print(sys.path)', 'print(2 ** 33)']


#上面例子的等效形式
>>> res = []
>>> for line in open('script1.py'):
...     if line[0] == 'p':
...         res.append(line.rstrip())
...
>>> res
['print(sys.path)', 'print(2 ** 33)']


#列表解析支持双重循环
>>> [x + y for x in 'abc' for y in 'lmn']
['al', 'am', 'an', 'bl', 'bm', 'bn', 'cl', 'cm', 'cn']


#上面例子的等效形式
>>> res = []
>>> for x in 'abc':
...     for y in 'lmn':
...         res.append(x + y)
...
>>> res
['al', 'am', 'an', 'bl', 'bm', 'bn', 'cl', 'cm', 'cn']


#用户定义的类也可以实现迭代协议,就是实现__iter__方法

#for,列表解析,in成员关系测试,map内置函数以及sorted和zip内置函数等其他迭代环境也都使用了迭代协议,
#因为这些工具在对象中从左到右扫描

>>> for line in open('script1.py'):         # Use file iterators
...     print(line.upper(), end='')
...
IMPORT SYS
PRINT(SYS.PATH)
X = 2
PRINT(2 ** 33)


#列表解析使用了迭代协议
>>> uppers = [line.upper() for line in open('script1.py')]
>>> uppers
['IMPORT SYS\n', 'PRINT(SYS.PATH)\n', 'X = 2\n', 'PRINT(2 ** 33)\n']

#map函数使用了迭代协议
>>> map(str.upper, open('script1.py'))                 # map is an iterable in 3.0
<map object at 0x02660710>

>>> list( map(str.upper, open('script1.py')) )
['IMPORT SYS\n', 'PRINT(SYS.PATH)\n', 'X = 2\n', 'PRINT(2 ** 33)\n']

#in成员关系测试使用了迭代协议
>>> 'y = 2\n' in open('script1.py')
False
>>> 'x = 2\n' in open('script1.py')
True


#sorted,zip,enumerate,filter,reduce等接受可迭代对象作为参数,所以这些也都属于迭代工具
>>> sorted(open('script1.py'))
['import sys\n', 'print(2 ** 33)\n', 'print(sys.path)\n', 'x = 2\n']

>>> list(zip(open('script1.py'), open('script1.py')))
[('import sys\n', 'import sys\n'), ('print(sys.path)\n', 'print(sys.path)\n'),
('x = 2\n', 'x = 2\n'), ('print(2 ** 33)\n', 'print(2 ** 33)\n')]

>>> list(enumerate(open('script1.py')))
[(0, 'import sys\n'), (1, 'print(sys.path)\n'), (2, 'x = 2\n'),
(3, 'print(2 ** 33)\n')]

>>> list(filter(bool, open('script1.py')))
['import sys\n', 'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n']

>>> import functools, operator
>>> functools.reduce(operator.add, open('script1.py'))
'import sys\nprint(sys.path)\nx = 2\nprint(2 ** 33)\n'


#sum,any,all,max,min内置函数也都是迭代工具  sum(range(101))
>>> sum([3, 2, 4, 1, 5, 0])                     # sum expects numbers only
15
>>> any(['spam', '', 'ni'])
True
>>> all(['spam', '', 'ni'])
False
>>> max([3, 2, 5, 1, 4])
5
>>> min([3, 2, 5, 1, 4])
1


#max和min用于文件迭代
>>> max(open('script1.py'))         # Line with max/min string value
'x = 2\n'
>>> min(open('script1.py'))
'import sys\n'

#Python的内置工具集中从左到右的扫描一个对象的每项的工具,都定义为在主体对象上使用了迭代协议,
#调用了主体对象的__next__方法,list和tuple内置函数,字符串join方法都可以传入可迭代对象
>>> list(open('script1.py'))
['import sys\n', 'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n']

>>> tuple(open('script1.py'))
('import sys\n', 'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n')

>>> '&&'.join(open('script1.py'))
'import sys\n&&print(sys.path)\n&&x = 2\n&&print(2 ** 33)\n'

>>> a, b, c, d = open('script1.py')
>>> a, d
('import sys\n', 'print(2 ** 33)\n')

>>> a, *b = open('script1.py')                # 3.0 extended form
>>> a, b
('import sys\n', ['print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n'])


#集合解析和字典解析
>>> set(open('script1.py'))
{'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n', 'import sys\n'}

>>> {line for line in open('script1.py')}
{'print(sys.path)\n', 'x = 2\n', 'print(2 ** 33)\n', 'import sys\n'}

>>> {ix: line for ix, line in enumerate(open('script1.py'))}
{0: 'import sys\n', 1: 'print(sys.path)\n', 2: 'x = 2\n', 3: 'print(2 ** 33)\n'}


#集合解析和字典解析支持if过滤条件
>>> {line for line in open('script1.py') if line[0] == 'p'}
{'print(sys.path)\n', 'print(2 ** 33)\n'}

>>> {ix: line for (ix, line) in enumerate(open('script1.py')) if line[0] == 'p'}
{1: 'print(sys.path)\n', 3: 'print(2 ** 33)\n'}


#函数调用时,使用*args形式:这里的args就是可迭代对象,
>>> def f(a, b, c, d): print(a, b, c, d, sep='&')
...
>>> f(1, 2, 3, 4)
1&2&3&4
>>> f(*[1, 2, 3, 4])                  # Unpacks into arguments
1&2&3&4

>>> f(*open('script1.py'))            # Iterates by lines too!
import sys
&print(sys.path)
&x = 2
&print(2 ** 33)


#
>>> X = (1, 2)
>>> Y = (3, 4)
>>>
>>> list(zip(X, Y))                # Zip tuples: returns an iterable
[(1, 3), (2, 4)]
>>>
>>> A, B = zip(*zip(X, Y))          # Unzip a zip!
>>> A
(1, 2)
>>> B
(3, 4)

#python3.0中新的可迭代对象,比python2.X更强调迭代
#字典keys,values,items都返回可迭代对象,range,map,zip,filter也一样

>>> zip('abc', 'xyz')                  # An iterable in Python 3.0 (a list in 2.6)
<zip object at 0x02E66710>

>>> list(zip('abc', 'xyz'))            # Force list of results in 3.0 to display
[('a', 'x'), ('b', 'y'), ('c', 'z')]


#range迭代器,range函数不会在内存中构建一个结果列表
C:\\misc> c:\python30\python
>>> R = range(10)                # range returns an iterator, not a list
>>> R
range(0, 10)

>>> I = iter(R)                  # Make an iterator from the range
>>> next(I)                      # Advance to next result
0                                # What happens in for loops, comprehensions, etc.
>>> next(I)
1
>>> next(I)
2

>>> list(range(10))              # To force a list if required
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


#
>>> len(R)                       # range also does len and indexing, but no others
10
>>> R[0]
0
>>> R[-1]
9

>>> next(I)                      # Continue taking from iterator, where left off
3
>>> I.__next__()                 # .next() becomes .__next__(), but use new next()
4


#map,zip,filter都是自己的迭代器,所以可以直接调用next内置函数
>>> M = map(abs, (-1, 0, 1))            # map returns an iterator, not a list
>>> M
<map object at 0x0276B890>
>>> next(M)                             # Use iterator manually: exhausts results
1                                       # These do not support len() or indexing
>>> next(M)
0
>>> next(M)
1
>>> next(M)
StopIteration

>>> for x in M: print(x)                # map iterator is now empty: one pass only
...

>>> M = map(abs, (-1, 0, 1))            # Make a new iterator to scan again
>>> for x in M: print(x)                # Iteration contexts auto call next()
...
1
0
1
>>> list(map(abs, (-1, 0, 1)))          # Can force a real list if needed
[1, 0, 1]


#zip的迭代器是自身
>>> Z = zip((1, 2, 3), (10, 20, 30))    # zip is the same: a one-pass iterator
>>> Z
<zip object at 0x02770EE0>

>>> list(Z)
[(1, 10), (2, 20), (3, 30)]

>>> for pair in Z: print(pair)          # Exhausted after one pass
...

>>> Z = zip((1, 2, 3), (10, 20, 30))
>>> for pair in Z: print(pair)          # Iterator used automatically or manually
...
(1, 10)
(2, 20)
(3, 30)

>>> Z = zip((1, 2, 3), (10, 20, 30))
>>> next(Z)
(1, 10)
>>> next(Z)
(2, 20)


#filter迭代器也是自身
>>> filter(bool, ['spam', '', 'ni'])
<filter object at 0x0269C6D0>
>>> list(filter(bool, ['spam', '', 'ni']))
['spam', 'ni']


#单个迭代器vs多个迭代器
#range支持多个迭代器
>>> R = range(3)                           # range allows multiple iterators
>>> next(R)
TypeError: range object is not an iterator

>>> I1 = iter(R)
>>> next(I1)
0
>>> next(I1)
1
>>> I2 = iter(R)                           # Two iterators on one range
>>> next(I2)
0
>>> next(I1)                               # I1 is at a different spot than I2
2


#zip迭代器,map迭代器,filter迭代器是单个迭代器
>>> Z = zip((1, 2, 3), (10, 11, 12))
>>> I1 = iter(Z)
>>> I2 = iter(Z)                           # Two iterators on one zip
>>> next(I1)
(1, 10)
>>> next(I1)
(2, 11)
>>> next(I2)                               # I2 is at same spot as I1!
(3, 12)

>>> M = map(abs, (-1, 0, 1))               # Ditto for map (and filter)
>>> I1 = iter(M); I2 = iter(M)
>>> print(next(I1), next(I1), next(I1))
1 0 1
>>> next(I2)
StopIteration

>>> R = range(3)                            # But range allows many iterators
>>> I1, I2 = iter(R), iter(R)
>>> [next(I1), next(I1), next(I1)]
[0 1 2]
>>> next(I2)
0

#使用类来编写自己的可迭代对象的时候,将会看到通常通过针对iter调用返回一个新的对象,
#来支持多个迭代器,单个迭代器一般意味着一个对象返回其自身

#生成器函数和表达式的行为就像map和zip一样,支持单个的活跃迭代器,而不是像range一样


#字典视图迭代器
>>> D = dict(a=1, b=2, c=3)
>>> D
{'a': 1, 'c': 3, 'b': 2}

>>> K = D.keys()                              # A view object in 3.0, not a list
>>> K
<dict_keys object at 0x026D83C0>

>>> next(K)                                   # Views are not iterators themselves
TypeError: dict_keys object is not an iterator

>>> I = iter(K)                               # Views have an iterator,
>>> next(I)                                   # which can be used manually
'a'                                           # but does not support len(), index
>>> next(I)
'c'

>>> for k in D.keys(): print(k, end=' ')      # All iteration contexts use auto
...
a c b



>>> K = D.keys()
>>> list(K)                              # Can still force a real list if needed
['a', 'c', 'b']

>>> V = D.values()                       # Ditto for values() and items() views
>>> V
<dict_values object at 0x026D8260>
>>> list(V)
[1, 3, 2]

>>> list(D.items())
[('a', 1), ('c', 3), ('b', 2)]

>>> for (k, v) in D.items(): print(k, v, end=' ')
...
a 1 c 3 b 2


#字典有自己的迭代器,返回连续的键
>>> D                                    # Dictionaries still have own iterator
{'a': 1, 'c': 3, 'b': 2}                 # Returns next key on each iteration
>>> I = iter(D)
>>> next(I)
'a'
>>> next(I)
'c'

>>> for key in D: print(key, end=' ')    # Still no need to call keys() to iterate
...                                      # But keys is an iterator in 3.0 too!
a c b


#字典排序
>>> D
{'a': 1, 'c': 3, 'b': 2}
>>> for k in sorted(D.keys())): print(k, D[k], end=' ') 
...
a 1 b 2 c 3

>>> D
{'a': 1, 'c': 3, 'b': 2}                   
>>> for k in sorted(D): print(k, D[k], end=' ')       # Best practice key sorting
...
a 1 b 2 c 3


#使用yield语句,用户定义的函数可以转换为可迭代的生成器函数
#当编写在圆括号中的时候,,列表解析转变为可迭代的生成器表达式  (item for item in range(1,11))
#当用户定义的类通过__iter__或__getitem__运算符重载,则该类变得可迭代

评论

此博客中的热门博文

OAuth 2教程

rootfs,linux目录结构,(第3天)