File-I/O

ist doch ganz einfach, oder?

Christian Kauhaus · kc@gocept.com

FLYING CIRCUS
let

y
yo u r w e b ap p f l
flyingcircus.io
with open(’users.json’, ’w’) as f:
json.dump(userdata, f)
Abstraktion
gut so
(meistens)
Kernelspace

Userspace

open(filename, mode,

Codec

encoding=...,

Universal newline

newline=...,

Buffer

Cache(s)
Filesystem

Disk

buffering=...)

os.open(filename, flags,
mode)
Atomarität
Illusion:
I/O geschieht
in einem Stück
with open(filename, ’wb’) as f:
f.write(data)

open("out", O_WRONLY|O_CREAT|O_TRUNC, 0666)
write(6, "1628 0
0
1"..., 1572864)
write(6, "232210H242276vn"..., 159416)
close(6)

=
=
=
=

6
1572864
159416
0
os.listdir(’/usr/lib’)

openat(AT_FDCWD, "/usr/lib", O_RDONLY|O_NONBLOCK|
O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 813 entries */, 32768)
= 32728
getdents(3, /* 809 entries */, 32768)
= 32744
getdents(3, /* 811 entries */, 32768)
= 32720
getdents(3, /* 657 entries */, 32768)
= 26424
getdents(3, /* 0 entries */, 32768)
= 0
close(3)
= 0
Problem?
with open(filename, ’rb’) as f:
data = f.read()
with open(filename, ’wb’) as f:
f.write(process(data))
with open(filename, ’rb’) as f:
data = f.read()
with tempfile.NamedTemporaryFile(
’wb’, dir=os.path.dirname(filename)) as t:
t.write(process(data))
tempname = t.name
os.rename(tempname, filename)
Persistenz
Illusion:
I/O greift direkt
auf die Disk zu
>>> f = open(’/tmp/out’, ’w’)
>>> print(’hello world’, file=f)
>>> os.system(’cat /tmp/out’)
0
>>> f.close()
>>> os.system(’cat /tmp/out’)
hello world
0
$ python write.py
$ ls -l out
-rw-r--r-- 1 ck users 5851 Okt 12 11:49 out
# system crash, reboot
$ ls -l out
-rw-r--r-- 1 ck users 0 Okt 12 11:49 out
Buffer leeren
Cache leeren
with open(filename, ’wb’) as f:
f.write(data)
f.flush()
os.fsync(f)
Text-Dateien
Illusion:
Dateien
enthalten
UnicodeCodepoints
>>> with open(filename) as f:
...
f.read()
UnicodeDecodeError: ’ascii’ codec can’t decode byte
0xc3 in position 1: ordinal not in range(128)
Implizite Codierung
# enctest.py
print(’preferred encoding:’,
locale.getpreferredencoding())
with open(’preferred’, ’w’) as f:
f.write(’hëllon’)
print(’default encoding’,
sys.getdefaultencoding())
with open(’default’, ’wb’) as f:
f.write(’hëllon’.encode())
$ export LC_ALL=de_DE # latin1 encoding
$ python3.2 enctest.py
preferred encoding: ISO-8859-1
default encoding utf-8
$ ls -l preferred default
-rw-r--r-- 1 ck users 6 Okt 12 12:52 preferred
-rw-r--r-- 1 ck users 7 Okt 12 12:52 default
explizit
with open(filename, ’w’, encoding=’utf-8’) as f:
f.write(data)
Fazit
File-I/O ist einfach
Grenzen der
Abstraktion kennen
Fragen?
Bildnachweis

© zweiwest.ch

flickr.com/teachernz
CC BY-NC-SA

flickr.com/iaea_imagebank
CC BY-NC-ND

flickr.com/atlantica
CC BY

flickr.com/jasoneppink
CC BY-NC-SA

flickr.com/seychelles88
CC BY-NC-SA

File-I/O -- ist doch ganz einfach, oder?