Skip to content

Commit

Permalink
mm: add tracepoint for scanning pages
Browse files Browse the repository at this point in the history
This patch series makes swapin readahead up to a certain number to gain
more thp performance and adds tracepoint for khugepaged_scan_pmd,
collapse_huge_page, __collapse_huge_page_isolate.

This patch series was written to deal with programs that access most,
but not all, of their memory after they get swapped out.  Currently
these programs do not get their memory collapsed into THPs after the
system swapped their memory out, while they would get THPs before
swapping happened.

This patch series was tested with a test program, it allocates 400MB of
memory, writes to it, and then sleeps.  I force the system to swap out
all.  Afterwards, the test program touches the area by writing and
leaves a piece of it without writing.  This shows how much swap in
readahead made by the patch.

Test results:

                        After swapped out
-------------------------------------------------------------------
              | Anonymous | AnonHugePages | Swap      | Fraction  |
-------------------------------------------------------------------
With patch    | 90076 kB    | 88064 kB    | 309928 kB |    %99    |
-------------------------------------------------------------------
Without patch | 194068 kB | 192512 kB     | 205936 kB |    %99    |
-------------------------------------------------------------------

                        After swapped in
-------------------------------------------------------------------
              | Anonymous | AnonHugePages | Swap      | Fraction  |
-------------------------------------------------------------------
With patch    | 201408 kB | 198656 kB     | 198596 kB |    %98    |
-------------------------------------------------------------------
Without patch | 292624 kB | 192512 kB     | 107380 kB |    %65    |
-------------------------------------------------------------------

This patch (of 3):

Using static tracepoints, data of functions is recorded.  It is good to
automatize debugging without doing a lot of changes in the source code.

This patch adds tracepoint for khugepaged_scan_pmd, collapse_huge_page
and __collapse_huge_page_isolate.

[dan.carpenter@oracle.com: add a missing tab]
Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
ebruAkagunduz authored and torvalds committed Jan 15, 2016
1 parent cb5490a commit 7d2eba0
Show file tree
Hide file tree
Showing 2 changed files with 270 additions and 32 deletions.
136 changes: 136 additions & 0 deletions include/trace/events/huge_memory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM huge_memory

#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HUGE_MEMORY_H

#include <linux/tracepoint.h>

#include <trace/events/gfpflags.h>

#define SCAN_STATUS \
EM( SCAN_FAIL, "failed") \
EM( SCAN_SUCCEED, "succeeded") \
EM( SCAN_PMD_NULL, "pmd_null") \
EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
EM( SCAN_PAGE_RO, "no_writable_page") \
EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \
EM( SCAN_PAGE_NULL, "page_null") \
EM( SCAN_SCAN_ABORT, "scan_aborted") \
EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
EM( SCAN_PAGE_LRU, "page_not_in_lru") \
EM( SCAN_PAGE_LOCK, "page_locked") \
EM( SCAN_PAGE_ANON, "page_not_anon") \
EM( SCAN_ANY_PROCESS, "no_process_for_page") \
EM( SCAN_VMA_NULL, "vma_null") \
EM( SCAN_VMA_CHECK, "vma_check_failed") \
EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed")

#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);

SCAN_STATUS

#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}

TRACE_EVENT(mm_khugepaged_scan_pmd,

TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
bool referenced, int none_or_zero, int status),

TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, pfn)
__field(bool, writable)
__field(bool, referenced)
__field(int, none_or_zero)
__field(int, status)
),

TP_fast_assign(
__entry->mm = mm;
__entry->pfn = pfn;
__entry->writable = writable;
__entry->referenced = referenced;
__entry->none_or_zero = none_or_zero;
__entry->status = status;
),

TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s",
__entry->mm,
__entry->pfn,
__entry->writable,
__entry->referenced,
__entry->none_or_zero,
__print_symbolic(__entry->status, SCAN_STATUS))
);

TRACE_EVENT(mm_collapse_huge_page,

TP_PROTO(struct mm_struct *mm, int isolated, int status),

TP_ARGS(mm, isolated, status),

TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(int, isolated)
__field(int, status)
),

TP_fast_assign(
__entry->mm = mm;
__entry->isolated = isolated;
__entry->status = status;
),

TP_printk("mm=%p, isolated=%d, status=%s",
__entry->mm,
__entry->isolated,
__print_symbolic(__entry->status, SCAN_STATUS))
);

TRACE_EVENT(mm_collapse_huge_page_isolate,

TP_PROTO(unsigned long pfn, int none_or_zero,
bool referenced, bool writable, int status),

TP_ARGS(pfn, none_or_zero, referenced, writable, status),

TP_STRUCT__entry(
__field(unsigned long, pfn)
__field(int, none_or_zero)
__field(bool, referenced)
__field(bool, writable)
__field(int, status)
),

TP_fast_assign(
__entry->pfn = pfn;
__entry->none_or_zero = none_or_zero;
__entry->referenced = referenced;
__entry->writable = writable;
__entry->status = status;
),

TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s",
__entry->pfn,
__entry->none_or_zero,
__entry->referenced,
__entry->writable,
__print_symbolic(__entry->status, SCAN_STATUS))
);

#endif /* __HUGE_MEMORY_H */
#include <trace/define_trace.h>
Loading

0 comments on commit 7d2eba0

Please sign in to comment.