ch07字符串(python)

python3有3种字符串:
str用于Unicode文本
bytes用于二进制数据
bytearray是bytes的一种可变变体

python2.6的字符串:
Unicode字符串表示宽Unicode文本
str字符串处理8位文本和二进制数据
bytearray




>>> 'shrubbery', "shrubbery"
('shrubbery', 'shrubbery')

>>> 'knight"s', "knight's"
('knight"s', "knight's")


#即使不用+号连接,也可以自动合并
>>> title = "Meaning " 'of' " Life"     # Implicit concatenation
>>> title
'Meaning of Life'

>>> 'knight\'s', "knight\"s"
("knight's", 'knight"s')



>>> s = 'a\nb\tc'

>>> s
'a\nb\tc'
>>> print(s)
a
b c

>>> len(s)
5

转义字符:
\\
\'
\"
\a  响铃
\b  倒退
\f  换页
\n  换行
\r  返回
\t  水平制表符
\v  垂直制表符
\N{id}  Unicode数据库ID
\uhhhh  Unicode 16位的十六位进制值
\Uhhhhhhhh Unicode 32位的十六位进制值
\xhh 十六进制值
\ooo 八进制值,\后面是3个0-7的数字
\0 Null,不是字符串结尾
\other 不转义(保留)

#字符串中嵌入二进制值
>>> s = 'a\0b\0c'
>>> s
'a\x00b\x00c'
>>> len(s)
5

#\001 \002表示八进制,\x03表示十六进制03
>>> s = '\001\002\x03'
>>> s
'\x01\x02\x03'
>>> len(s)
3

>>> S = "s\tp\na\x00m"
>>> S
's\tp\na\x00m'
>>> len(S)
7
>>> print(S)
s p
a m

# \ 如果没有和别的字符组合,则保留下
>>> x = "C:\py\code"          # Keeps \ literally
>>> x
'C:\\py\\code'
>>> len(x)
10


#raw字符串抑制转义
myfile = open('C:\new\text.dat', 'w')

myfile = open(r'C:\new\text.dat', 'w')

>>> path = r'C:\new\text.dat'
>>> path                        # Show as Python code,python代码形式
'C:\\new\\text.dat'
>>> print(path)                 # User-friendly format,用户友好的格式
C:\new\text.dat
>>> len(path)                   # String length
15

#unix风格的路径在win下也有效
myfile = open('C:/new/text.dat', 'w')


# r'....\'不是有效的字符串常量,raw字符串不可以使用 \结尾
#解决方法:r'1\nb\tc\\'[:-1]或者r'1\nb\tc'+'\\'或者'1\\nb\\tc\\'



#三重引号字符串
>>> mantra = """Always look
... on the bright
... side of life."""
>>>
>>> mantra
'Always look\n on the bright\nside of life.'

>>> print(mantra)
Always look
 on the bright
side of life.

#三重引号字符串常用作文档字符串,


#也可以使代码不工作
X = 1
"""
import os                   # Disable this code temporarily
print(os.getcwd())
"""
Y = 2



>>> len('abc')           # Length: number of items
3
>>> 'abc' + 'def'        # Concatenation: a new string
'abcdef'
>>> 'Ni!' * 4            # Repetition: like "Ni!" + "Ni!" + ...
'Ni!Ni!Ni!Ni!'

>>> print('------- ...more... ---')       # 80 dashes, the hard way
>>> print('-' * 80)

#操作符重载,数字和字符串都可以使用+和*,但是效果却不同,多态的表现

#字符串支持for循环,支持迭代
>>> myjob = "hacker"
>>> for c in myjob: print(c, end=' ')     # Step through items
#print的调用形式有关键字参数
...
h a c k e r


#成员关系测试in
>>> "k" in myjob              # Found
True
>>> "z" in myjob              # Not found
False
>>> 'spam' in 'abcspamdef'    # Substring search, no position returned
True

#索引和分片 S[i:j] j不包括在内
>>> S = 'spam'
>>> S[0], S[-2]               # Indexing from front or end
('s', 'a')
>>> S[1:3], S[1:], S[:-1]     # Slicing: extract a section
('pa', 'pam', 'spa')

#分片的第三个参数
>>> S = 'abcdefghijklmnop'
>>> S[1:10:2]
'bdfhj'
>>> S[::2]
'acegikmo'

#字符串反转
>>> S = 'hello'
>>> S[::-1]
'olleh'

>>> S = 'abcedfg'
>>> S[5:1:-1]
'fdec'


#slice
>>> 'spam'[1:3]              # Slicing syntax
'pa'
>>> 'spam'[slice(1, 3)]      # Slice objects
'pa'
>>> 'spam'[::-1]
'maps'
>>> 'spam'[slice(None, None, -1)]
'maps'


#分片的典型应用
# File echo.py
import sys
print(sys.argv)

% python echo.py -a -b -c
['echo.py', '-a', '-b', '-c']

sys.argv[1:] 就可以提取参数


#字符串转换工具
>>> "42" + 1
TypeError: cannot concatenate 'str' and 'int' objects

>>> int("42"), str(42)      # Convert from/to string
(42, '42')
>>> repr(42)                # Convert to as-code string
'42'

>>> print(str('spam'), repr('spam'))
('spam', "'spam'")



>>> S = "42"
>>> I = 1
>>> S + I
TypeError: cannot concatenate 'str' and 'int' objects

>>> int(S) + I         # Force addition
43

>>> S + str(I)         # Force concatenation
'421'

>>> str(3.1415), float("1.5")
('3.1415', 1.5)
>>> text = "1.234E-10"
>>> float(text)
1.2340000000000001e-010

eval函数


#字符串代码转换,2个内置函数
>>> ord('s')
115
>>> chr(115)
's'

>>> S = '5'
>>> S = chr(ord(S) + 1)
>>> S
'6'
>>> S = chr(ord(S) + 1)
>>> S
'7'

>>> int('5')
5
>>> ord('5') - ord('0')
5

#求二进制的10进制
>>> B = '1101'              # Convert binary digits to integer with ord
>>> I = 0
>>> while B != '':
...     I = I * 2 + (ord(B[0]) - ord('0'))
...     B = B[1:]
...
>>> I
13

>>> int('1101', 2)      # Convert binary to integer: built-in
13
>>> bin(13)             # Convert integer to binary
'0b1101'


#修改字符串
>>> S = 'spam'
>>> S[0] = "x"
Raises an error!


>>> S = S + 'SPAM!'    # To change a string, make a new one
>>> S
'spamSPAM!'
>>> S = S[:4] + 'Burger' + S[-1]
>>> S
'spamBurger!'


>>> S = 'splot'
>>> S = S.replace('pl', 'pamal')
>>> S
'spamalot'


#字符串格式化
>>> 'That is %d %s bird!' % (1, 'dead')           # Format expression
That is 1 dead bird!
>>> 'That is {0} {1} bird!'.format(1, 'dead')     # Format method in 2.6 and 3.0
'That is 1 dead bird!'


#字符串方法
>>> S = 'spammy'
>>> S = S[:3] + 'xx' + S[5:]
>>> S
'spaxxy'


>>> S = 'spammy'
>>> S = S.replace('mm', 'xx')
>>> S
'spaxxy'


>>> 'aa$bb$cc$dd'.replace('$', 'SPAM')
'aaSPAMbbSPAMccSPAMdd

#find方法
>>> S = 'xxxxSPAMxxxxSPAMxxxx'
>>> where = S.find('SPAM')                # Search for position
>>> where                                 # Occurs at offset 4
4
>>> S = S[:where] + 'EGGS' + S[(where+4):]
>>> S
'xxxxEGGSxxxxSPAMxxxx'


>>> S = 'xxxxSPAMxxxxSPAMxxxx'
>>> S.replace('SPAM', 'EGGS')         # Replace all
'xxxxEGGSxxxxEGGSxxxx'
>>> S.replace('SPAM', 'EGGS', 1)      # Replace one
'xxxxEGGSxxxxSPAMxxxx'


#将字符串转化为list,然后修改完之后,用join方法连接起来
>>> S = 'spammy'
>>> L = list(S)
>>> L
['s', 'p', 'a', 'm', 'm', 'y']

>>> L[3] = 'x'                         # Works for lists, not strings
>>> L[4] = 'x'
>>> L
['s', 'p', 'a', 'x', 'x', 'y']

>>> S = ''.join(L)
>>> S
'spaxxy'


>>> 'SPAM'.join(['eggs', 'sausage', 'ham', 'toast'])
'eggsSPAMsausageSPAMhamSPAMtoast'



#字符串分割
>>> line = 'aaa bbb ccc'
>>> col1 = line[0:3]
>>> col3 = line[8:]
>>> col1
'aaa'
>>> col3
'ccc'

>>> line = 'aaa bbb ccc'
>>> cols = line.split()
>>> cols
['aaa', 'bbb', 'ccc']

>>> line = 'bob,hacker,40'
>>> line.split(',')
['bob', 'hacker', '40']

>>> line = "i'mSPAMaSPAMlumberjack"
>>> line.split("SPAM")
["i'm", 'a', 'lumberjack']


#字符串的其他方法
>>> line = "The knights who say Ni!\n"
>>> line.rstrip()
'The knights who say Ni!'
>>> line.upper()
'THE KNIGHTS WHO SAY NI!\n'
>>> line.isalpha()
False
>>> line.endswith('Ni!\n')
True
>>> line.startswith('The')
True


#in可以用于字符串
>>> line
'The knights who say Ni!\n'
>>> line.find('Ni') != -1            # Search via method call or expression
True
>>> 'Ni' in line
True

>>> sub = 'Ni!\n'
>>> line.endswith(sub)               # End test via method call or slice
True
>>> line[-len(sub):] == sub
True

#寻求帮助  help(str.method)


# 正则表达式模块,import re


#最初的字符串模块string,调用形式  string.method(X, arguments)


#对比
>>> S = 'a+b+c+'

>>> x = S.replace('+', 'spam')
>>> x
'aspambspamcspam

#string模块
>>> import string
>>> y = string.replace(S, '+', 'spam')
>>> y
'aspambspamcspam'



# NOTE: ths spacing in the output of some of the following
# tests may differ from what is shown in the book, and what
# appears when run; cut-and-paste lost some whitespace here...


#字符串格式化表达式
>>> 'That is %d %s bird!' % (1, 'dead')      # Format expression
That is 1 dead bird!

# ADDED: the new method equivalent
>>> 'That is {0:d} {1:s} bird!'.format(1, 'dead')
That is 1 dead bird!

>>> exclamation = "Ni"
>>> "The knights who say %s!" % exclamation
'The knights who say Ni!'

>>> "%d %s %d you" % (1, 'spam', 4)
'1 spam 4 you'

>>> "%s -- %s -- %s" % (42, 3.14159, [1, 2, 3])
'42 -- 3.14159 -- [1, 2, 3]'


%s 字符串,使用str
%r 使用repr,而不是str
%c 字符
%d 十进制
%i 整数
%u 无符号整数
%o 八进制
%x 十六进制
%X 十六进制,X大写
%e 科学记数法
%E 科学记数法,E大写
%f 浮点数
%F 浮点数
%g 浮点e或f
%G 浮点E或F
%% 常量%


完整格式:%[(name)][flags][width][.precision]typecode

name可以放置字典的键
flags表示+正负号,-左对齐,补0
width表示数字的整体长度
precision表示小数点后的位数,width和precision可以使用*,从输入值的下一项中获取


>>> x = 1234
>>> res = "integers: ...%d...%-6d...%06d" % (x, x, x)
>>> res
'integers: ...1234...1234 ...001234'

>>> x = 1.23456789
>>> x
1.2345678899999999

>>> '%e | %f | %g' % (x, x, x)
'1.234568e+00 | 1.234568 | 1.23457'

>>> '%E' % x
'1.234568E+00'

>>> '%-6.2f | %05.2f | %+06.1f' % (x, x, x)
'1.23 | 01.23 | +001.2'

>>> "%s" % x, str(x)
('1.23456789', '1.23456789')

>>> '%f, %.2f, %.*f' % (1/3.0, 1/3.0, 4, 1/3.0)
'0.333333, 0.33, 0.3333'


#基于字典的字符串格式化
>>> "%(n)d %(x)s" % {"n":1, "x":"spam"}
'1 spam'

>>> reply = """                               # Template with substitution targets
Greetings...
Hello %(name)s!
Your age squared is %(age)s
"""
>>> values = {'name': 'Bob', 'age': 40}       # Build up values to substitute
>>> print(reply % values)                     # Perform substitutions

Greetings...
Hello Bob!
Your age squared is 40

#vars函数返回一个字典
>>> food = 'spam'
>>> age = 40
>>> vars()
{'food': 'spam', 'age': 40, ...many more... }

>>> "%(age)d %(food)s" % vars()
'40 spam'

#字符串的格式方法format,位置
>>> template = '{0}, {1} and {2}'                    # By position
>>> template.format('spam', 'ham', 'eggs')
'spam, ham and eggs'


#字符串的格式方法format,关键字
>>> template = '{motto}, {pork} and {food}'          # By keyword
>>> template.format(motto='spam', pork='ham', food='eggs')
'spam, ham and eggs'

#字符串的格式方法format,关键字和位置混合
>>> template = '{motto}, {0} and {food}'             # By both
>>> template.format('ham', motto='spam', food='eggs')
'spam, ham and eggs'



>>> '{motto}, {0} and {food}'.format(42, motto=3.14, food=[1, 2])
'3.14, 42 and [1, 2]'

>>> X = '{motto}, {0} and {food}'.format(42, motto=3.14, food=[1, 2])
>>> X
'3.14, 42 and [1, 2]'

>>> X.split(' and ')
['3.14, 42', '[1, 2]']

>>> Y = X.replace('and', 'but under no circumstances')
>>> Y
'3.14, 42 but under no circumstances [1, 2]'


#格式化字符串中的方括号可以使用字典,属性,索引
>>> import sys
>>> 'My {1[spam]} runs {0.platform}'.format(sys, {'spam': 'laptop'})
'My laptop runs win32'

>>> 'My {config[spam]} runs {sys.platform}'.format(sys=sys,
                                                   config={'spam': 'laptop'})
'My laptop runs win32'



>>> somelist = list('SPAM')
>>> somelist
['S', 'P', 'A', 'M']

>>> 'first={0[0]}, third={0[2]}'.format(somelist)  #支持索引
'first=S, third=A'

>>> 'first={0}, last={1}'.format(somelist[0], somelist[-1])    # [-1] fails in fmt
'first=S, last=M'

>>> parts = somelist[0], somelist[-1], somelist[1:3]           # [1:3] fails in fmt
>>> 'first={0}, last={1}, middle={2}'.format(*parts)
"first=S, last=M, middle=['P', 'A']"


#添加具体的格式化:
{fieldname!conversionflag:formatspec}.format(...)
fieldname表示指定参数的一个数字或关键字,后面跟着可选的.name或者跟着[index]
conversionflag表示r,s,a对应于调用repr,str,ascii内置函数一次

formatspec形式上如同:[[fill]align][sign][#][0][width][.precision][typecode]
align表示对齐,<,>,=




>>> '{0:10} = {1:10}'.format('spam', 123.4567)
'spam = 123.457'

>>> '{0:>10} = {1:<10}'.format('spam', 123.4567)
' spam = 123.457 '

>>> '{0.platform:>10} = {1[item]:<10}'.format(sys, dict(item='laptop'))
' win32 = laptop


#浮点数
>>> '{0:e}, {1:.3e}, {2:g}'.format(3.14159, 3.14159, 3.14159)
'3.141590e+00, 3.142e+00, 3.14159'

>>> '{0:f}, {1:.2f}, {2:06.2f}'.format(3.14159, 3.14159, 3.14159)
'3.141590, 3.14, 003.14'


#格式化方法也支持十六进制,八进制和二进制
>>> '{0:X}, {1:o}, {2:b}'.format(255, 255, 255)    # Hex, octal, binary
'FF, 377, 11111111'

>>> bin(255), int('11111111', 2), 0b11111111       # Other to/from binary
('0b11111111', 255, 255)

>>> hex(255), int('FF', 16), 0xFF                  # Other to/from hex
('0xff', 255, 255)

>>> oct(255), int('377', 8), 0o377, 0377           # Other to/from octal
('0377', 255, 255, 255)                            # 0377 works in 2.6, not 3.0!



>>> '{0:.2f}'.format(1 / 3.0)                 # Parameters hardcoded
'0.33'
>>> '%.2f' % (1 / 3.0)
'0.33'

>>> '{0:.{1}f}'.format(1 / 3.0, 4)            # Take value from arguments
'0.3333'
>>> '%.*f' % (4, 1 / 3.0)                     # Ditto for expression
'0.3333'


#内置的format函数,运行主体对象的__format__方法
>>> '{0:.2f}'.format(1.2345)     # String method
'1.23'
>>> format(1.2345, '.2f')        # Built-in function
'1.23'
>>> '%.2f' % 1.2345              # Expression
'1.23'


#%表达式和格式化方法的对比,通常%表达式更容易编写
print('%s=%s' % ('spam', 42))                # 2.X+ format expression

print('{0}={1}'.format('spam', 42))          # 3.0 (and 2.6) format method



# The basics: with % instead of format()

#使用%表达式替换格式化方法
>>> template = '%s, %s, %s'
>>> template % ('spam', 'ham', 'eggs')                        # By position
'spam, ham, eggs'

>>> template = '%(motto)s, %(pork)s and %(food)s'
>>> template % dict(motto='spam', pork='ham', food='eggs')    # By key
'spam, ham and eggs'

>>> '%s, %s and %s' % (3.14, 42, [1, 2])                      # Arbitrary types
'3.14, 42 and [1, 2]'


# Adding keys, attributes, and offsets

>>> 'My %(spam)s runs %(platform)s' % {'spam': 'laptop', 'platform': sys.platform}
'My laptop runs win32'

>>> 'My %(spam)s runs %(platform)s' % dict(spam='laptop', platform=sys.platform)
'My laptop runs win32'

>>> somelist = list('SPAM')
>>> parts = somelist[0], somelist[-1], somelist[1:3]
>>> 'first=%s, last=%s, middle=%s' % parts
"first=S, last=M, middle=['P', 'A']"


# Adding specific formatting

>>> '%-10s = %10s' % ('spam', 123.4567)
'spam = 123.4567'

>>> '%10s = %-10s' % ('spam', 123.4567)
' spam = 123.4567 '

>>> '%(plat)10s = %(item)-10s' % dict(plat=sys.platform, item='laptop')
' win32 = laptop '

# Floating-point numbers

>>> '%e, %.3e, %g' % (3.14159, 3.14159, 3.14159)
'3.141590e+00, 3.142e+00, 3.14159'

>>> '%f, %.2f, %06.2f' % (3.14159, 3.14159, 3.14159)
'3.141590, 3.14, 003.14'

# Hex and octal, but not binary
>>> '%x, %o' % (255, 255)
'ff, 377'



# Hardcoded references in both
#2种方式的对比
>>> import sys
>>> 'My {1[spam]:<8} runs {0.platform:>8}'.format(sys, {'spam': 'laptop'})
'My laptop   runs    win32'

>>> 'My %(spam)-8s runs %(plat)8s' % dict(spam='laptop', plat=sys.platform)
'My laptop   runs    win32'



# Building data ahead of time in both

>>> data = dict(platform=sys.platform, spam='laptop')

>>> 'My {spam:<8} runs {platform:>8}'.format(**data)   #把data字典打散,当作关键词参数传递
'My laptop   runs    win32'

>>> 'My %(spam)-8s runs %(platform)8s' % data
'My laptop   runs    win32'


#python 3.1的扩展
# python 3.1 and later

>>> '{0:d}'.format(999999999999)
'999999999999'
>>> '{0:,d}'.format(999999999999)
'999,999,999,999'

>>> '{:,d}'.format(999999999999)
'999,999,999,999'
>>> '{:,d} {:,d}'.format(9999999, 8888888)
'9,999,999 8,888,888'
>>> '{:,.2f}'.format(296999.2567)
'296,999.26'


#为什么推荐使用格式化方法
>>> '{0:b}'.format((2 ** 16) -1)
'1111111111111111'

>>> '%b' % ((2 ** 16) -1)
ValueError: unsupported format character 'b' (0x62) at index 1

>>> bin((2 ** 16) -1)
'0b1111111111111111'

>>> '%s' % bin((2 ** 16) -1)[2:]
'1111111111111111'


#{i}的形式比较直观
'\n%s<Class %s, address %s:\n%s%s%s>\n' % (...)              # Expression

'\n{0}<Class {1}, address {2}:\n{3}{4}{5}>\n'.format(...)    # Method


#
C:\misc> C:\Python31\python
>>> 'The {0} side {1} {2}'.format('bright', 'of', 'life')
'The bright side of life'
>>>
>>> 'The {} side {} {}'.format('bright', 'of', 'life') # Python 3.1+
'The bright side of life'
>>>
>>> 'The %s side %s %s' % ('bright', 'of', 'life')
'The bright side of life'


C:\misc> C:\Python31\python
>>> '{0:f}, {1:.2f}, {2:05.2f}'.format(3.14159, 3.14159, 3.14159)
'3.141590, 3.14, 03.14'
>>>
>>> '{:f}, {:.2f}, {:06.2f}'.format(3.14159, 3.14159, 3.14159)
'3.141590, 3.14, 003.14'
>>>
>>> '%f, %.2f, %06.2f' % (3.14159, 3.14159, 3.14159)
'3.141590, 3.14, 003.14'



>>> '%.2f' % 1.2345
'1.23'
>>> '%.2f %s' % (1.2345, 99)
'1.23 99'

>>> '%s' % 1.23
'1.23'
>>> '%s' % (1.23,)
'1.23'
>>> '%s' % ((1.23,),)
'(1.23,)'

>>> '{0:.2f}'.format(1.2345)
'1.23'
>>> '{0:.2f} {1}'.format(1.2345, 99)
'1.23 99'
>>> '{0}'.format(1.23)
'1.23'
>>> '{0}'.format((1.23,))
'(1.23,)'



同样分类的类型的共享其操作集合



python不可变类型:数字 int,字符串 str,元组 tuple,不可变集合
可变类型:列表 list,字典 dict,可变集合 set

评论

此博客中的热门博文

OAuth 2教程

网格策略

apt-get详细使用