source: trunk/packages/xen-common/xen-common/tools/python/xen/xend/XendCheckpoint.py @ 34

Last change on this file since 34 was 34, checked in by hartmans, 17 years ago

Add xen and xen-common

File size: 11.3 KB
Line 
1# Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
2# Copyright (C) 2005 XenSource Ltd
3
4# This file is subject to the terms and conditions of the GNU General
5# Public License.  See the file "COPYING" in the main directory of
6# this archive for more details.
7
8import os
9import re
10import string
11import threading
12import fcntl
13from struct import pack, unpack, calcsize
14
15from xen.util.xpopen import xPopen3
16import xen.util.auxbin
17import xen.lowlevel.xc
18
19from xen.xend import balloon, sxp
20from xen.xend.XendError import XendError, VmError
21from xen.xend.XendLogging import log
22from xen.xend.XendConfig import XendConfig
23from xen.xend.XendConstants import *
24
25SIGNATURE = "LinuxGuestRecord"
26QEMU_SIGNATURE = "QemuDeviceModelRecord"
27dm_batch = 512
28XC_SAVE = "xc_save"
29XC_RESTORE = "xc_restore"
30
31
32sizeof_int = calcsize("i")
33sizeof_unsigned_int = calcsize("I")
34sizeof_unsigned_long = calcsize("L")
35
36
37xc = xen.lowlevel.xc.xc()
38
39
40def write_exact(fd, buf, errmsg):
41    if os.write(fd, buf) != len(buf):
42        raise XendError(errmsg)
43
44
45def read_exact(fd, size, errmsg):
46    buf  = '' 
47    while size != 0: 
48        readstr = os.read(fd, size)
49        if not len(readstr):
50            log.error("read_exact: EOF trying to read %d (buf='%s')" % \
51                      (size, buf))
52            raise XendError(errmsg)
53        size = size - len(readstr)
54        buf  = buf + readstr
55    return buf
56
57
58def save(fd, dominfo, network, live, dst, checkpoint=False):
59    write_exact(fd, SIGNATURE, "could not write guest state file: signature")
60
61    config = sxp.to_string(dominfo.sxpr())
62
63    domain_name = dominfo.getName()
64    # Rename the domain temporarily, so that we don't get a name clash if this
65    # domain is migrating (live or non-live) to the local host.  Doing such a
66    # thing is useful for debugging.
67    dominfo.setName('migrating-' + domain_name)
68
69    try:
70        dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
71
72        write_exact(fd, pack("!i", len(config)),
73                    "could not write guest state file: config len")
74        write_exact(fd, config, "could not write guest state file: config")
75
76        image_cfg = dominfo.info.get('image', {})
77        hvm = dominfo.info.is_hvm()
78
79        # xc_save takes three customization parameters: maxit, max_f, and
80        # flags the last controls whether or not save is 'live', while the
81        # first two further customize behaviour when 'live' save is
82        # enabled. Passing "0" simply uses the defaults compiled into
83        # libxenguest; see the comments and/or code in xc_linux_save() for
84        # more information.
85        cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
86               str(dominfo.getDomid()), "0", "0", 
87               str(int(live) | (int(hvm) << 2)) ]
88        log.debug("[xc_save]: %s", string.join(cmd))
89
90        def saveInputHandler(line, tochild):
91            log.debug("In saveInputHandler %s", line)
92            if line == "suspend":
93                log.debug("Suspending %d ...", dominfo.getDomid())
94                dominfo.shutdown('suspend')
95                dominfo.waitForShutdown()
96                dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
97                                       domain_name)
98                log.info("Domain %d suspended.", dominfo.getDomid())
99                dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
100                                       domain_name)
101                #send signal to device model for save
102                if hvm:
103                    log.info("release_devices for hvm domain")
104                    dominfo._releaseDevices(True)
105                tochild.write("done\n")
106                tochild.flush()
107                log.debug('Written done')
108
109        forkHelper(cmd, fd, saveInputHandler, False)
110
111        # put qemu device model state
112        if hvm:
113            write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
114            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(), os.O_RDONLY)
115            while True:
116                buf = os.read(qemu_fd, dm_batch)
117                if len(buf):
118                    write_exact(fd, buf, "could not write device model state")
119                else:
120                    break
121            os.close(qemu_fd)
122            os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
123
124        if checkpoint:
125            dominfo.resumeDomain()
126        else:
127            dominfo.destroyDomain()
128            dominfo.testDeviceComplete()
129        try:
130            dominfo.setName(domain_name)
131        except VmError:
132            # Ignore this.  The name conflict (hopefully) arises because we
133            # are doing localhost migration; if we are doing a suspend of a
134            # persistent VM, we need the rename, and don't expect the
135            # conflict.  This needs more thought.
136            pass
137
138    except Exception, exn:
139        log.exception("Save failed on domain %s (%s).", domain_name,
140                      dominfo.getDomid())
141
142        dominfo.resumeDomain()
143        log.debug("XendCheckpoint.save: resumeDomain")
144
145        try:
146            dominfo.setName(domain_name)
147        except:
148            log.exception("Failed to reset the migrating domain's name")
149
150
151def restore(xd, fd, dominfo = None, paused = False):
152    signature = read_exact(fd, len(SIGNATURE),
153        "not a valid guest state file: signature read")
154    if signature != SIGNATURE:
155        raise XendError("not a valid guest state file: found '%s'" %
156                        signature)
157
158    l = read_exact(fd, sizeof_int,
159                   "not a valid guest state file: config size read")
160    vmconfig_size = unpack("!i", l)[0]
161    vmconfig_buf = read_exact(fd, vmconfig_size,
162        "not a valid guest state file: config read")
163
164    p = sxp.Parser()
165    p.input(vmconfig_buf)
166    if not p.ready:
167        raise XendError("not a valid guest state file: config parse")
168
169    vmconfig = p.get_val()
170
171    if dominfo:
172        dominfo.resume()
173    else:
174        dominfo = xd.restore_(vmconfig)
175
176    store_port   = dominfo.getStorePort()
177    console_port = dominfo.getConsolePort()
178
179    assert store_port
180    assert console_port
181
182    nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
183
184    # if hvm, pass mem size to calculate the store_mfn
185    image_cfg = dominfo.info.get('image', {})
186    is_hvm = dominfo.info.is_hvm()
187    if is_hvm:
188        apic = int(dominfo.info['platform'].get('apic', 0))
189        pae  = int(dominfo.info['platform'].get('pae',  0))
190        log.info("restore hvm domain %d, apic=%d, pae=%d",
191                 dominfo.domid, apic, pae)
192    else:
193        apic = 0
194        pae  = 0
195
196    try:
197        shadow = dominfo.info['shadow_memory']
198        log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ",
199                  dominfo.info['shadow_memory'],
200                  dominfo.info['memory_static_max'],
201                  dominfo.info['memory_static_min'])
202
203        balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
204
205        shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow)
206        dominfo.info['shadow_memory'] = shadow_cur
207
208        xc.domain_setmaxmem(dominfo.getDomid(), dominfo.getMemoryMaximum())
209
210        cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
211                        fd, dominfo.getDomid(),
212                        store_port, console_port, int(is_hvm), pae, apic])
213        log.debug("[xc_restore]: %s", string.join(cmd))
214
215        handler = RestoreInputHandler()
216
217        forkHelper(cmd, fd, handler.handler, True)
218
219        # We don't want to pass this fd to any other children -- we
220        # might need to recover ths disk space that backs it.
221        try:
222            flags = fcntl.fcntl(fd, fcntl.F_GETFD)
223            flags |= fcntl.FD_CLOEXEC
224            fcntl.fcntl(fd, fcntl.F_SETFD, flags)
225        except:
226            pass
227
228        if handler.store_mfn is None:
229            raise XendError('Could not read store MFN')
230
231        if not is_hvm and handler.console_mfn is None:
232            raise XendError('Could not read console MFN')       
233
234        # get qemu state and create a tmp file for dm restore
235        if is_hvm:
236            qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
237                                        "invalid device model signature read")
238            if qemu_signature != QEMU_SIGNATURE:
239                raise XendError("not a valid device model state: found '%s'" %
240                                qemu_signature)
241            qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
242                              os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
243            while True:
244                buf = os.read(fd, dm_batch)
245                if len(buf):
246                    write_exact(qemu_fd, buf,
247                                "could not write dm state to tmp file")
248                else:
249                    break
250            os.close(qemu_fd)
251
252
253        os.read(fd, 1)           # Wait for source to close connection
254       
255        dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
256
257        #
258        # We shouldn't hold the domains_lock over a waitForDevices
259        # As this function sometime gets called holding this lock,
260        # we must release it and re-acquire it appropriately
261        #
262        from xen.xend import XendDomain
263
264        lock = True;
265        try:
266            XendDomain.instance().domains_lock.release()
267        except:
268            lock = False;
269
270        try:
271            dominfo.waitForDevices() # Wait for backends to set up
272        except Exception, exn:
273            log.exception(exn)
274
275        if lock:
276            XendDomain.instance().domains_lock.acquire()
277
278        if not paused:
279            dominfo.unpause()
280
281        return dominfo
282    except:
283        dominfo.destroy()
284        raise
285
286
287class RestoreInputHandler:
288    def __init__(self):
289        self.store_mfn = None
290        self.console_mfn = None
291
292
293    def handler(self, line, _):
294        m = re.match(r"^(store-mfn) (\d+)$", line)
295        if m:
296            self.store_mfn = int(m.group(2))
297        else:
298            m = re.match(r"^(console-mfn) (\d+)$", line)
299            if m:
300                self.console_mfn = int(m.group(2))
301
302
303def forkHelper(cmd, fd, inputHandler, closeToChild):
304    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
305
306    if closeToChild:
307        child.tochild.close()
308
309    thread = threading.Thread(target = slurp, args = (child.childerr,))
310    thread.start()
311
312    try:
313        try:
314            while 1:
315                line = child.fromchild.readline()
316                if line == "":
317                    break
318                else:
319                    line = line.rstrip()
320                    log.debug('%s', line)
321                    inputHandler(line, child.tochild)
322
323        except IOError, exn:
324            raise XendError('Error reading from child process for %s: %s' %
325                            (cmd, exn))
326    finally:
327        child.fromchild.close()
328        if not closeToChild:
329            child.tochild.close()
330        thread.join()
331        child.childerr.close()
332        status = child.wait()
333
334    if status >> 8 == 127:
335        raise XendError("%s failed: popen failed" % string.join(cmd))
336    elif status != 0:
337        raise XendError("%s failed" % string.join(cmd))
338
339
340def slurp(infile):
341    while 1:
342        line = infile.readline()
343        if line == "":
344            break
345        else:
346            line = line.strip()
347            m = re.match(r"^ERROR: (.*)", line)
348            if m is None:
349                log.info('%s', line)
350            else:
351                log.error('%s', m.group(1))
Note: See TracBrowser for help on using the repository browser.