source: trunk/packages/xen-3.1/xen-3.1/xen/arch/ia64/xen/tlb_track.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 18 years ago

Add xen and xen-common

File size: 17.9 KB
Line 
1/******************************************************************************
2 * tlb_track.c
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 *                    VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 *
21 */
22
23#include <asm/tlb_track.h>
24#include <asm/p2m_entry.h>
25#include <asm/vmx_mm_def.h>  /* for IA64_RR_SHIFT */
26#include <asm/vmx_vcpu.h>    /* for VRN7 */
27#include <asm/vcpu.h>        /* for PSCB() */
28
29#define CONFIG_TLB_TRACK_DEBUG
30#ifdef CONFIG_TLB_TRACK_DEBUG
31# define tlb_track_printd(fmt, ...)     \
32    dprintk(XENLOG_DEBUG, fmt, ##__VA_ARGS__)
33#else
34# define tlb_track_printd(fmt, ...)     do { } while (0)
35#endif
36
37static int
38tlb_track_allocate_entries(struct tlb_track* tlb_track)
39{
40    struct page_info* entry_page;
41    struct tlb_track_entry* track_entries;
42    unsigned int allocated;
43    unsigned long i;
44
45    BUG_ON(tlb_track->num_free > 0);
46    if (tlb_track->num_entries >= tlb_track->limit) {
47        dprintk(XENLOG_WARNING, "%s: num_entries %d limit %d\n",
48                __func__, tlb_track->num_entries, tlb_track->limit);
49        return -ENOMEM;
50    }
51    entry_page = alloc_domheap_page(NULL);
52    if (entry_page == NULL) {
53        dprintk(XENLOG_WARNING,
54                "%s: domheap page failed. num_entries %d limit %d\n",
55                __func__, tlb_track->num_entries, tlb_track->limit);
56        return -ENOMEM;
57    }
58
59    list_add(&entry_page->list, &tlb_track->page_list);
60    track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
61    allocated = PAGE_SIZE / sizeof(track_entries[0]);
62    tlb_track->num_entries += allocated;
63    tlb_track->num_free += allocated;
64    for (i = 0; i < allocated; i++) {
65        list_add(&track_entries[i].list, &tlb_track->free_list);
66        // tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]);
67    }
68    tlb_track_printd("allocated %d num_entries %d num_free %d\n",
69                     allocated, tlb_track->num_entries, tlb_track->num_free);
70    return 0;
71}
72
73
74int
75tlb_track_create(struct domain* d)
76{
77    struct tlb_track* tlb_track = NULL;
78    struct page_info* hash_page = NULL;
79    unsigned int hash_size;
80    unsigned int hash_shift;
81    unsigned int i;
82
83    tlb_track = xmalloc(struct tlb_track);
84    if (tlb_track == NULL)
85        goto out;
86
87    hash_page = alloc_domheap_page(NULL);
88    if (hash_page == NULL)
89        goto out;
90
91    spin_lock_init(&tlb_track->free_list_lock);
92    INIT_LIST_HEAD(&tlb_track->free_list);
93    tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
94    tlb_track->num_entries = 0;
95    tlb_track->num_free = 0;
96    INIT_LIST_HEAD(&tlb_track->page_list);
97    if (tlb_track_allocate_entries(tlb_track) < 0)
98        goto out;
99
100    spin_lock_init(&tlb_track->hash_lock);
101    /* XXX hash size optimization */
102    hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]);
103    for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++)
104        /* nothing */;
105    tlb_track->hash_size = (1 << hash_shift);
106    tlb_track->hash_shift = hash_shift;
107    tlb_track->hash_mask = (1 << hash_shift) - 1;
108    tlb_track->hash = page_to_virt(hash_page);
109    for (i = 0; i < tlb_track->hash_size; i++)
110        INIT_LIST_HEAD(&tlb_track->hash[i]);
111
112    smp_mb(); /* make initialization visible before use. */
113    d->arch.tlb_track = tlb_track;
114    dprintk(XENLOG_DEBUG, "hash 0x%p hash_size %d\n",
115            tlb_track->hash, tlb_track->hash_size);
116
117    return 0;
118
119out:
120    if (hash_page != NULL)
121        free_domheap_page(hash_page);
122
123    if (tlb_track != NULL)
124        xfree(tlb_track);
125
126    return -ENOMEM;
127}
128
129void
130tlb_track_destroy(struct domain* d)
131{
132    struct tlb_track* tlb_track = d->arch.tlb_track;
133    struct page_info* page;
134    struct page_info* next;
135
136    spin_lock(&tlb_track->free_list_lock);
137    BUG_ON(tlb_track->num_free != tlb_track->num_entries);
138
139    list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
140        list_del(&page->list);
141        free_domheap_page(page);
142    }
143
144    free_domheap_page(virt_to_page(tlb_track->hash));
145    xfree(tlb_track);
146    // d->tlb_track = NULL;
147}
148
149static struct tlb_track_entry*
150tlb_track_get_entry(struct tlb_track* tlb_track)
151{
152    struct tlb_track_entry* entry = NULL;
153    spin_lock(&tlb_track->free_list_lock);
154    if (tlb_track->num_free == 0)
155        (void)tlb_track_allocate_entries(tlb_track);
156
157    if (tlb_track->num_free > 0) {
158        BUG_ON(list_empty(&tlb_track->free_list));
159        entry = list_entry(tlb_track->free_list.next,
160                           struct tlb_track_entry, list);
161        tlb_track->num_free--;
162        list_del(&entry->list);
163    }
164    spin_unlock(&tlb_track->free_list_lock);
165    return entry;
166}
167
168void
169tlb_track_free_entry(struct tlb_track* tlb_track,
170                     struct tlb_track_entry* entry)
171{
172    spin_lock(&tlb_track->free_list_lock);
173    list_add(&entry->list, &tlb_track->free_list);
174    tlb_track->num_free++;
175    spin_unlock(&tlb_track->free_list_lock);
176}
177
178
179#include <linux/hash.h>
180/* XXX hash function. */
181static struct list_head*
182tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep)
183{
184    unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift);
185    BUG_ON(hash >= tlb_track->hash_size);
186    BUG_ON((hash & tlb_track->hash_mask) != hash);
187    return &tlb_track->hash[hash];
188}
189
190static int
191tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte)
192{
193    if (pte_pfn(old_pte) != pte_pfn(ret_pte) ||
194        (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) !=
195        (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) {
196        /* Other thread zapped the p2m entry. */
197        return 1;
198    }
199    return 0;
200}
201
202static TLB_TRACK_RET_T
203tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm,
204                          volatile pte_t* ptep, pte_t old_pte,
205                          unsigned long vaddr, unsigned long rid)
206{
207    unsigned long mfn = pte_pfn(old_pte);
208    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
209    struct tlb_track_entry* entry;
210    struct tlb_track_entry* new_entry = NULL;
211    unsigned long bit_to_be_set = _PAGE_TLB_INSERTED;
212    pte_t new_pte;
213    pte_t ret_pte;
214
215    struct vcpu* v = current;
216    TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND;
217
218#if 0 /* this is done at vcpu_tlb_track_insert_or_dirty() */
219    perfc_incr(tlb_track_iod);
220    if (!pte_tlb_tracking(old_pte)) {
221        perfc_incr(tlb_track_iod_not_tracked);
222        return TLB_TRACK_NOT_TRACKED;
223    }
224#endif
225    if (pte_tlb_inserted_many(old_pte)) {
226        perfc_incr(tlb_track_iod_tracked_many);
227        return TLB_TRACK_MANY;
228    }
229
230    /* vaddr must be normalized so that it is in vrn7 and page aligned. */
231    BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7);
232    BUG_ON((vaddr & ~PAGE_MASK) != 0);
233#if 0
234    tlb_track_printd("\n"
235                     "\tmfn 0x%016lx\n"
236                     "\told_pte 0x%016lx ptep 0x%p\n"
237                     "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n"
238                     "\ttlb_track 0x%p head 0x%p\n",
239                     mfn,
240                     pte_val(old_pte), ptep, pte_val(*ptep),
241                     vaddr, rid,
242                     tlb_track, head);
243#endif
244
245 again:
246    /*
247     * zapping side may zap the p2m entry and then remove tlb track entry
248     * non-atomically. We may see the stale tlb track entry here.
249     * p2m_entry_retry() handles such a case.
250     * Or other thread may zap the p2m entry and remove tlb track entry
251     * and inserted new tlb track entry.
252     */
253    spin_lock(&tlb_track->hash_lock);
254    list_for_each_entry(entry, head, list) {
255        if (entry->ptep != ptep)
256            continue;
257
258        if (pte_pfn(entry->pte_val) == mfn) {
259            // tlb_track_entry_printf(entry);
260            if (entry->vaddr == vaddr && entry->rid == rid) {
261                // tlb_track_printd("TLB_TRACK_FOUND\n");
262                ret = TLB_TRACK_FOUND;
263                perfc_incr(tlb_track_iod_found);
264#ifdef CONFIG_TLB_TRACK_CNT
265                entry->cnt++;
266                if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) {
267                    /*
268                     * heuristics:
269                     * If a page is used to transfer data by dev channel,
270                     * it would be unmapped with small amount access
271                     * (once or twice tlb insert) after real device
272                     * I/O completion. It would be short period.
273                     * However this page seems to be accessed many times.
274                     * We guess that this page is used I/O ring
275                     * so that tracking this entry might be useless.
276                     */
277                     // tlb_track_entry_printf(entry);
278                     // tlb_track_printd("cnt = %ld\n", entry->cnt);
279                    perfc_incr(tlb_track_iod_force_many);
280                    goto force_many;
281                }
282#endif
283                goto found;
284            } else {
285#ifdef CONFIG_TLB_TRACK_CNT
286            force_many:
287#endif
288                if (!pte_tlb_inserted(old_pte)) {
289                    printk("%s:%d racy update\n", __func__, __LINE__);
290                    old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED);
291                }
292                new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY);
293                ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
294                if (pte_val(ret_pte) != pte_val(old_pte)) {
295                    // tlb_track_printd("TLB_TRACK_AGAIN\n");
296                    ret = TLB_TRACK_AGAIN;
297                    perfc_incr(tlb_track_iod_again);
298                } else {
299                    // tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n",
300                    //                  entry);
301                    ret = TLB_TRACK_MANY;
302                    list_del(&entry->list);
303                    // tlb_track_entry_printf(entry);
304                    perfc_incr(tlb_track_iod_tracked_many_del);
305                }
306                goto out;
307            }
308        }
309
310        /*
311         * Other thread changed the p2m entry and removed and inserted new
312         * tlb tracn entry after we get old_pte, but before we get
313         * spinlock.
314         */
315        // tlb_track_printd("TLB_TRACK_AGAIN\n");
316        ret = TLB_TRACK_AGAIN;
317        perfc_incr(tlb_track_iod_again);
318        goto out;
319    }
320
321    entry = NULL; // prevent freeing entry.
322    if (pte_tlb_inserted(old_pte)) {
323        /* Other thread else removed the tlb_track_entry after we got old_pte
324           before we got spin lock. */
325        ret = TLB_TRACK_AGAIN;
326        perfc_incr(tlb_track_iod_again);
327        goto out;
328    }
329    if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) {
330        spin_unlock(&tlb_track->hash_lock);
331        new_entry = tlb_track_get_entry(tlb_track);
332        if (new_entry == NULL) {
333            tlb_track_printd("get_entry failed\n");
334            /* entry can't be allocated.
335               fall down into full flush mode. */
336            bit_to_be_set |= _PAGE_TLB_INSERTED_MANY;
337            perfc_incr(tlb_track_iod_new_failed);
338        }
339        // tlb_track_printd("new_entry 0x%p\n", new_entry);
340        perfc_incr(tlb_track_iod_new_entry);
341        goto again;
342    }
343
344    BUG_ON(pte_tlb_inserted_many(old_pte));
345    new_pte = __pte(pte_val(old_pte) | bit_to_be_set);
346    ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
347    if (pte_val(old_pte) != pte_val(ret_pte)) {
348        if (tlb_track_pte_zapped(old_pte, ret_pte)) {
349            // tlb_track_printd("zapped TLB_TRACK_AGAIN\n");
350            ret = TLB_TRACK_AGAIN;
351            perfc_incr(tlb_track_iod_again);
352            goto out;
353        }
354
355        /* Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY */
356        if (pte_tlb_inserted_many(ret_pte)) {
357            /* Other thread already set _PAGE_TLB_INSERTED_MANY and
358               removed the entry. */
359            // tlb_track_printd("iserted TLB_TRACK_MANY\n");
360            BUG_ON(!pte_tlb_inserted(ret_pte));
361            ret = TLB_TRACK_MANY;
362            perfc_incr(tlb_track_iod_new_many);
363            goto out;
364        }
365        BUG_ON(pte_tlb_inserted(ret_pte));
366        BUG();
367    }
368    if (new_entry) {
369        // tlb_track_printd("iserting new_entry 0x%p\n", new_entry);
370        entry = new_entry;
371        new_entry = NULL;
372
373        entry->ptep = ptep;
374        entry->pte_val = old_pte;
375        entry->vaddr = vaddr;
376        entry->rid = rid;
377        cpus_clear(entry->pcpu_dirty_mask);
378        vcpus_clear(entry->vcpu_dirty_mask);
379        list_add(&entry->list, head);
380
381#ifdef CONFIG_TLB_TRACK_CNT
382        entry->cnt = 0;
383#endif
384        perfc_incr(tlb_track_iod_insert);
385        // tlb_track_entry_printf(entry);
386    } else {
387        goto out;
388    }
389
390 found:
391    BUG_ON(v->processor >= NR_CPUS);
392    cpu_set(v->processor, entry->pcpu_dirty_mask);
393    BUG_ON(v->vcpu_id >= NR_CPUS);
394    vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask);
395    perfc_incr(tlb_track_iod_dirtied);
396
397 out:
398    spin_unlock(&tlb_track->hash_lock);
399    if (ret == TLB_TRACK_MANY && entry != NULL)
400        tlb_track_free_entry(tlb_track, entry);
401    if (new_entry != NULL)
402        tlb_track_free_entry(tlb_track, new_entry);
403    return ret;
404}
405
406void
407__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
408                                 struct p2m_entry* entry)
409{
410    unsigned long vrn = vaddr >> IA64_RR_SHIFT;
411    unsigned long rid = PSCB(vcpu, rrs[vrn]);
412    TLB_TRACK_RET_T ret;
413
414    /* normalize vrn7
415       When linux dom0 case, vrn7 is the most common case. */
416    vaddr |= VRN7 << VRN_SHIFT;
417    vaddr &= PAGE_MASK;
418    ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track,
419                                    &vcpu->domain->arch.mm,
420                                    entry->ptep, entry->used,
421                                    vaddr, rid);
422    if (ret == TLB_TRACK_AGAIN)
423        p2m_entry_set_retry(entry);
424}
425
426TLB_TRACK_RET_T
427tlb_track_search_and_remove(struct tlb_track* tlb_track,
428                            volatile pte_t* ptep, pte_t old_pte,
429                            struct tlb_track_entry** entryp)
430{
431    unsigned long mfn = pte_pfn(old_pte);
432    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
433    struct tlb_track_entry* entry;
434
435    perfc_incr(tlb_track_sar);
436    if (!pte_tlb_tracking(old_pte)) {
437        perfc_incr(tlb_track_sar_not_tracked);
438        return TLB_TRACK_NOT_TRACKED;
439    }
440    if (!pte_tlb_inserted(old_pte)) {
441        BUG_ON(pte_tlb_inserted_many(old_pte));
442        perfc_incr(tlb_track_sar_not_found);
443        return TLB_TRACK_NOT_FOUND;
444    }
445    if (pte_tlb_inserted_many(old_pte)) {
446        BUG_ON(!pte_tlb_inserted(old_pte));
447        perfc_incr(tlb_track_sar_many);
448        return TLB_TRACK_MANY;
449    }
450
451    spin_lock(&tlb_track->hash_lock);
452    list_for_each_entry(entry, head, list) {
453        if (entry->ptep != ptep)
454            continue;
455
456        if (pte_pfn(entry->pte_val) == mfn) {
457            /*
458             * PARANOIA
459             * We're here after zapping p2m entry.  However another pCPU
460             * may update the same p2m entry entry the same mfn at the
461             * same time in theory.  In such a case, we can't determine
462             * whether this entry is for us or for the racy p2m update.
463             * Such a guest domain's racy behaviour doesn't make sense,
464             * but is allowed.  Go the very pessimistic way.  Leave this
465             * entry to be found later and do full flush at this time.
466             *
467             * NOTE: Updating tlb tracking hash is protected by spin lock and
468             *       setting _PAGE_TLB_INSERTED and_PAGE_TLB_INSERTED_MANY bits
469             *       is serialized by the same spin lock.
470             *       See tlb_track_insert_or_dirty().
471             */
472            pte_t current_pte = *ptep;
473            if (unlikely(pte_pfn(current_pte) == mfn &&
474                         pte_tlb_tracking(current_pte) &&
475                         pte_tlb_inserted(current_pte))) {
476                BUG_ON(pte_tlb_inserted_many(current_pte));
477                spin_unlock(&tlb_track->hash_lock);
478                perfc_incr(tlb_track_sar_many);
479                return TLB_TRACK_MANY;
480            }
481
482            list_del(&entry->list);
483            spin_unlock(&tlb_track->hash_lock);
484            *entryp = entry;
485            perfc_incr(tlb_track_sar_found);
486            // tlb_track_entry_printf(entry);
487#ifdef CONFIG_TLB_TRACK_CNT
488            // tlb_track_printd("cnt = %ld\n", entry->cnt);
489#endif
490            return TLB_TRACK_FOUND;
491        }
492        BUG();
493    }
494    BUG();
495    spin_unlock(&tlb_track->hash_lock);
496    return TLB_TRACK_NOT_TRACKED;
497}
498
499/* for debug */
500void
501__tlb_track_entry_printf(const char* func, int line,
502                         const struct tlb_track_entry* entry)
503{
504    char pcpumask_buf[NR_CPUS + 1];
505    char vcpumask_buf[MAX_VIRT_CPUS + 1];
506    cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf),
507                      entry->pcpu_dirty_mask);
508    vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf),
509                       entry->vcpu_dirty_mask);
510    printk("%s:%d\n"
511           "\tmfn 0x%016lx\n"
512           "\told_pte 0x%016lx ptep 0x%p\n"
513           "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n"
514           "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n"
515           "\tentry 0x%p\n",
516           func, line,
517           pte_pfn(entry->pte_val),
518           pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep),
519           entry->vaddr, entry->rid,
520           pcpumask_buf, vcpumask_buf,
521           entry);
522}
523
524/*
525 * Local variables:
526 * mode: C
527 * c-set-style: "BSD"
528 * c-basic-offset: 4
529 * tab-width: 4
530 * indent-tabs-mode: nil
531 * End:
532 */
Note: See TracBrowser for help on using the repository browser.