// // Data Prefetching Championship Simulator 2 // Seth Pugsley, seth.h.pugsley@intel.com // /* * #002 Lookahead Prefetching with Signature Path * * Jinchun Kim, cienlux@tamu.edu * Paul V. Gratz, pgratz@gratz1.com * A. L. Narasimha Reddy, reddy@tamu.edu * Department of Electrical and Computer Engineering * Texas A&M University * * Compile command: g++ -Wall -o dpc2sim example_prefetchers/002_kim.cpp lib/dpc2sim.a * * NOTE: Use #define DEBUG to print out detailed info */ #include #include #include extern "C" { #include "../inc/prefetcher.h" } //#define DEBUG #define FILTER_ON #define LOOKAHEAD_ON #define AUX_ON // DEBUG printf #ifdef DEBUG #define D(x) x #else #define D(x) #endif // Signature table configuration #define ST_SET 512 #define ST_WAY 2 #define ST_TAG_BIT 8 #define SIG_LENGTH 12 #define SIG_SHIFT 3 // Pattern table configuration #define PT_SET ( 1 << (SIG_LENGTH) ) #define PT_WAY 4 #define PT_TAG_BIT 7 #define COUNTER_BIT 3 // Filter configuration #define FILTER_SET 256 #define FILTER_WAY 2 // Useful numbers #define PRIME1 509 #define PRIME2 257 #define PRIME3 251 #define MAX_ENTRY 4 #define HISTORY 4 #define PRINTF_TRACK 2048 #define MAX_INFLIGHT 4 #define MAX_CONFIDENCE 95 #define PF_THRESHOLD 50 #define LA_THRESHOLD 75 #define MSHR_THRESHOLD 8 #define RQ_THRESHOLD 16 #define BLOCK_SIZE 64 enum {READ, UPDATE, DEMAND, PREFETCH, EVICT, FILL, CHECK, FROM_ST, FROM_PT, FROM_FILTER}; typedef unsigned long long int Addr; typedef class SPP_class { public: ////////////////////////////////// Components // Common bool valid[MAX_ENTRY]; int p_tag[MAX_ENTRY]; int lru[MAX_ENTRY]; // Signature Table (ST) int signature[MAX_ENTRY]; int last_block[MAX_ENTRY]; // Pattern Table (PT) int counter[MAX_ENTRY]; int max_idx; // Prefetch Engine (PE) int filter_bitmap[FILTER_WAY][BLOCK_SIZE]; // DEBUG int idx; int delta[MAX_ENTRY][HISTORY]; int new_delta; int pf_candidate, la_candidate; /////////////////////////////////// Functions // Common SPP_class(); void update_lru(int match_idx, bool invalid, int type); // ST stage int access_ST(int tag, int curr_block, int type); int update_signature(int assoc, int type); // PT stage void update_PT(int tag); int read_PT(int conf, int la_depth); void update_counter(int match_idx, bool miss); // PE stage bool check_filter(int tag, int block, int type); } SPP_class; SPP_class ST[ST_SET]; // Signature table SPP_class PT[PT_SET]; // Pattern table SPP_class FILTER[FILTER_SET]; // Filter // Statistics int ST_access=0, ST_hit=0, ST_miss=0; int PT_access=0, PT_hit=0, PT_miss=0; int PF_attempt=0, PF_local=0, PF_beyond=0, PF_filtered=0; int PF_issue=0, PF_inflight=0, PF_fail=0; int PF_L2_attempt=0, PF_L2_issue=0, PF_L2_fail=0; int PF_LLC_attempt=0,PF_LLC_issue=0, PF_LLC_fail=0; int AUX_attempt=0, AUX_local=0, AUX_beyond=0, AUX_filtered=0; int AUX_issue=0, AUX_inflight=0, AUX_fail=0; int AUX_L2_attempt=0, AUX_L2_issue=0, AUX_L2_fail=0; int AUX_LLC_attempt=0, AUX_LLC_issue=0, AUX_LLC_fail=0; int LA_attempt=0, LA_prefetch=0; int LA_depth[PRINTF_TRACK]; int depth=0; int l2_hit=0, l2_miss=0, l2_access=0; int remain_mshr=0, remain_rq=0; // Prefetch function void attempt_prefetch(Addr base_addr, int base_block, int PT_idx, int la_candidate, int pf_candidate); // Lookahead function void lookahead(Addr base_addr, int base_block, int prev_conf, int PT_idx, int la_candidate); // Auxiliary next line prefetcher void aux_prefetch(Addr base_addr); void l2_prefetcher_initialize(int cpu_num) { // you can inspect these knob values from your code to see which configuration you're runnig in printf("Knobs visible from prefetcher: %d %d %d\n", knob_scramble_loads, knob_small_llc, knob_low_bandwidth); // Used to measure average lookahead depth for (int i=0; i> 12); int curr_offset = addr & 0xFFF, curr_block = curr_offset/BLOCK_SIZE, ST_idx = curr_page % PRIME1, partial_pnum = curr_page & ((1 << ST_TAG_BIT) - 1), PT_update_idx = 0, PT_pref_idx = 0, la_candidate = 0, pf_candidate = 0, init_conf = 0; int FILTER_idx = curr_page % PRIME2; if (FILTER_idx >= 255) FILTER_idx = curr_page % PRIME3; // Remain L2 resources remain_mshr = 16 - get_l2_mshr_occupancy(0); remain_rq = 32 - get_l2_read_queue_occupancy(0); D(printf("\ncycle: %llu ip: %llx page: %llx ST_idx: %3d block: %2d hit: %d\n", get_current_cycle(0), ip, curr_page, ST_idx, curr_block, cache_hit)); l2_hit += cache_hit; l2_access++; // Set demand block in filter // This demand might be already requested by prefetcher and set to "1" by the filter // However, we cannot drop demand request since participants don't have control over demand request FILTER[FILTER_idx].check_filter(partial_pnum, curr_block, DEMAND); // 1) Signature table stage ST[ST_idx].idx = ST_idx; // DEBUG // 1-1) Read signature and get PT_update_idx (used for PT update) PT_update_idx = ST[ST_idx].access_ST(partial_pnum, curr_block, READ); // 1-2) Update history signature and get PT_pref_index (used for prefetching) PT_pref_idx = ST[ST_idx].access_ST(partial_pnum, curr_block, UPDATE); // 2) Pattern table Stage PT[PT_pref_idx].idx = PT_pref_idx; // DEBUG // 2-1) Update PT with PT_update_idx // PT is not updated if stride is zero if ((PT_update_idx < PT_SET) && (ST[ST_idx].new_delta != 0)) PT[PT_update_idx].update_PT(ST[ST_idx].new_delta); // 2-2) Read prefetch and lookahead candidates from PT if (PT_pref_idx > PT_SET) // Do not request prefetch when stride is zero return; init_conf = PT[PT_pref_idx].read_PT(100, 0); la_candidate = PT[PT_pref_idx].la_candidate; pf_candidate = PT[PT_pref_idx].pf_candidate; // 3) Prefetch engine stage // 3-1) Prefetch if there is possible candidate with enough confidence PF_inflight = 0; depth = 0; if (pf_candidate) // Prefetch with SPP attempt_prefetch(addr, curr_block, PT_pref_idx, la_candidate, pf_candidate); #ifdef LOOKAHEAD_ON // 3-2) Lookahead if (la_candidate) lookahead(addr, curr_block, init_conf, PT_pref_idx, la_candidate); else D(printf("\t[LOOKAHEAD] No more lookahead! next_la_candidate: %d\n", la_candidate)); #endif if (PF_inflight >= MAX_INFLIGHT) { D(printf("\t[PREFETCH] PF_inflight reached threshold %d!\n", MAX_INFLIGHT)); return; } #ifdef AUX_ON // Auxiliary next line prefetcher if ((pf_candidate == 0) && (la_candidate == 0)) { // No prefetch and lookahead candidate D(printf("\t[PT-READ] No pf_candidate: %d curr_block: %d\n", pf_candidate, curr_block)); // Do not activate AUX across the page boundary if ((curr_block+1) > (BLOCK_SIZE - 1)) { D(printf("\t[PREFETCH-AUX] Out of same page boundary!\n")); AUX_beyond++; return; } // Do not activate AUX if the next line has been already demanded or prefetched if (!FILTER[FILTER_idx].check_filter(partial_pnum, curr_block+1, PREFETCH)) { AUX_filtered++; AUX_attempt++; return; } else // Call auxiliary next line prefetcher aux_prefetch(addr); } #endif return; } void l2_cache_fill(int cpu_num, unsigned long long int addr, int set, int way, int prefetch, unsigned long long int evicted_addr) { // uncomment this line to see the information available to you when there is a cache fill event //printf("0x%llx %d %d %d 0x%llx\n", addr, set, way, prefetch, evicted_addr); // L2 FILL Addr curr_page = addr >> 12; int curr_block = (addr >> 6) & 0x3F, FILTER_idx = curr_page % PRIME2, partial_pnum = curr_page & ((1 << ST_TAG_BIT) - 1); if (FILTER_idx >= 255) FILTER_idx = curr_page % PRIME3; D(printf("cycle: %llu [L2-FILL] page: %llx FILTER_idx: %3d block: %3d prefetch: %d set: %4d way: %d\n", get_current_cycle(0), curr_page, FILTER_idx, curr_block, prefetch, set, way)); FILTER[FILTER_idx].check_filter(partial_pnum, curr_block, FILL); // L2 EVICT if (evicted_addr) { curr_page = evicted_addr >> 12; curr_block = (evicted_addr >> 6) & 0x3F; FILTER_idx = curr_page % PRIME2; partial_pnum = curr_page & ((1 << ST_TAG_BIT) - 1); if (FILTER_idx >= 255) FILTER_idx = curr_page % PRIME3; D(printf("cycle: %llu [L2-EVICT] page: %llx FILTER_idx: %3d block: %3d set: %4d way: %d\n", get_current_cycle(0), curr_page, FILTER_idx, curr_block, set , way)); FILTER[FILTER_idx].check_filter(partial_pnum, curr_block, EVICT); } } void l2_prefetcher_heartbeat_stats(int cpu_num) { printf("Prefetcher heartbeat stats\n"); } void l2_prefetcher_warmup_stats(int cpu_num) { printf("Prefetcher warmup complete stats\n\n"); } void l2_prefetcher_final_stats(int cpu_num) { printf("Prefetcher final stats\n"); int sum_depth = 0, num_total = 0, avg_depth = 0; for (int i=1; i Clear history and replacing lru[%d]: %d\n", idx, match_idx, tag, match_idx, lru[match_idx])); for (int j=0; j curr-Signature: %3x\n", signature[assoc])); } else { // Read signature D(printf("\t[ST-READ] Index: %4x prev-History[%d]: ", idx, assoc)); D(for (int i=0; i prev-Signature: %3x\n", signature[assoc])); } return signature[assoc]; } void SPP_class::update_counter(int match_idx, bool miss) { if (miss) { // No matching pattern for (int i=0; i= PF_THRESHOLD) { pf_candidate = pf_candidate | (1 << i); // Mark prefetch candidates D(printf("\t[PT] Index: %4x Prefetch[%d]: %3d prev-conf: %3d check-conf: %3d la_depth: %2d\n", idx, i, p_tag[i], conf, acc_conf, la_depth)); if (acc_conf >= LA_THRESHOLD) { la_candidate = la_candidate | (1 << i); // Mark lookahead candidates // D(printf("\t[PT] Index: %4x Lookahead[%d]: %3d prev-conf: %3d check-conf: %3d la_depth: %2d\n", // idx, i, p_tag[i], conf, acc_conf, la_depth)); if (max_counter < counter[i]) { // Record an entry with max_counter max_idx = i; max_counter = counter[max_idx]; } if ((counter[max_idx] == counter[i]) && (lru[max_idx] < lru[i])) { max_idx = i; // Tie braker for same max_counter, choose most recently used on } } } } if (depth) // During lookahead, choose only one pf_candidate with maximum confidence pf_candidate = 1 << max_idx; if(la_candidate) { max_conf = MAX_CONFIDENCE*counter[max_idx]/(1 << COUNTER_BIT); D(printf("\t[PT] Index: %4x Lookahead[%d]: %3d has Local-MAX-conf: %d la_depth: %2d\n", idx, max_idx, p_tag[max_idx], max_conf, la_depth)); } return conf*max_conf/100; // return accumulated confidence } void attempt_prefetch(Addr base_addr, int base_block, int PT_idx, int la_candidate, int pf_candidate) { Addr pf_addr, curr_page = base_addr >> 12; int num_la = 0, num_pf = 0, pf_block = 0, partial_pnum = curr_page & ((1 << ST_TAG_BIT) - 1); bool pf_issue = false; int FILTER_idx = curr_page % PRIME2; if (FILTER_idx >= 255) FILTER_idx = curr_page % PRIME3; // DEBUG for (int i=0; i> 6) &0x3F, PT_idx, i, PT[PT_idx].p_tag[i], pf_block)); if (pf_block < 0 || pf_block > 63) { PF_beyond++; PF_attempt++; D(printf("\t[PT-READ] Out of same page boundary!\n")); continue; } // Set prefetching address pf_addr = (((base_addr >> 12) << 6) + pf_block) << 6; // Check remaining mshr and read queue to decide where to prefetch if ((remain_mshr > MSHR_THRESHOLD) && (remain_rq > RQ_THRESHOLD)) { #ifdef FILTER_ON // Filtering redundant prefetch requests if (!FILTER[FILTER_idx].check_filter(partial_pnum, pf_block, PREFETCH)) { PF_filtered++; PF_attempt++; continue; } #endif pf_issue = l2_prefetch_line(0, base_addr, pf_addr, FILL_L2); // Issue L2 prefetch if (pf_issue) { // Update statistics PF_inflight++; PF_issue++; PF_local++; PF_attempt++; PF_L2_issue++; PF_L2_attempt++; remain_mshr--; remain_rq--; if (depth) LA_prefetch++; D(printf("\t[PREFETCH-L2] page: %llx pf_block: %llu pf_issue: %d depth: %d remain_mshr: %d remain_rq: %d\n", pf_addr>>12, (pf_addr>>6)&0x3F, pf_issue, depth, remain_mshr, remain_rq)); } else { D(printf("\t[PREFETCH-L2] Prefetch is NOT issued!\n")); PF_fail++; PF_local++; PF_attempt++; PF_L2_fail++; PF_L2_attempt++; } } else { if ((remain_rq > RQ_THRESHOLD)) { #ifdef FILTER_ON // Filtering redundant prefetch requests if (!FILTER[FILTER_idx].check_filter(partial_pnum, pf_block, PREFETCH)) { PF_filtered++; PF_attempt++; continue; } #endif pf_issue = l2_prefetch_line(0, base_addr, pf_addr, FILL_LLC); // Issue LLC prefetch if (pf_issue) { // Update statistics PF_inflight++; PF_issue++; PF_local++; PF_attempt++; PF_LLC_issue++; PF_LLC_attempt++; remain_rq--; if (depth) LA_prefetch++; D(printf("\t[PREFETCH-LLC] page: %llx pf_block: %llu pf_issue: %d depth: %d remain_mshr: %d remain_rq: %d\n", pf_addr>>12, (pf_addr>>6)&0x3F, pf_issue, depth, remain_mshr, remain_rq)); } else { D(printf("\t[PREFETCH-LLC] Prefetch is NOT issued!\n")); PF_fail++; PF_local++; PF_attempt++; PF_LLC_issue++; PF_LLC_attempt++; } } else { D(printf("\t[PREFETCH-LLC] Not enough resource to prefetch!\n")); return; } } if (PF_inflight >= MAX_INFLIGHT) { D(printf("\t[PREFETCH] PF_inflight reached threshold %d!\n", MAX_INFLIGHT)); return; } } } return; } void lookahead(Addr base_addr, int base_block, int prev_conf, int PT_idx, int la_candidate) { // Lookahead stars with the highest confidence int start = PT[PT_idx].max_idx, temp_delta = PT[PT_idx].p_tag[start] & 0x7F, next_sig = ((PT_idx << SIG_SHIFT) ^ temp_delta) & ((1 << SIG_LENGTH) - 1), next_conf = 0, next_la_candidate = 0, next_pf_candidate = 0, next_base_block = base_block + PT[PT_idx].p_tag[start]; // DEBUG D(printf("\t[LOOKAHEAD] next-Signature: %3x => %3x next_base_block: %3d\n", PT_idx, next_sig, next_base_block)); if (next_base_block < 0 || next_base_block > 63) { D(printf("\t[LOOKAHEAD] Next base block is Out of same page boundary!\n")); return; } // Read PT with next signature next_conf = PT[next_sig].read_PT(prev_conf, (++depth)); next_la_candidate = PT[next_sig].la_candidate; next_pf_candidate = PT[next_sig].pf_candidate; if (next_pf_candidate) attempt_prefetch(base_addr, next_base_block, next_sig, next_la_candidate, next_pf_candidate); if (PF_inflight >= MAX_INFLIGHT) { D(printf("\t[LOOKAHEAD] Stop LOOKAHEAD PF_inflight reached %d!\n", MAX_INFLIGHT)); return; } // If there is la_candidate and enough l2 read queue resource, try lookahead further if ((next_la_candidate) && (remain_rq > RQ_THRESHOLD)) { LA_attempt++; LA_depth[depth]++; lookahead(base_addr, next_base_block, next_conf, next_sig, next_la_candidate); } else if (next_la_candidate == 0) { D(printf("\t[LOOKAHEAD] No more lookahead! next_la_candidate: %d\n", next_la_candidate)); return; } return; } // Return false if block is filtered bool SPP_class::check_filter(int tag, int block, int type) { int match_idx = -1, min_lru = FILTER_WAY+1; for (int i=0; i Do nothing // FILL: Block is filled in and page is missing in filter // => Filled cache block is not likely to be prefetched again // => Do nothing instead of replacing a page with useful filtering info else return true; } void SPP_class::update_lru(int match_idx, bool invalid, int type) { int i, num_entry; switch (type) { case FROM_ST: num_entry = ST_WAY; break; case FROM_PT: num_entry = PT_WAY; break; case FROM_FILTER: num_entry = FILTER_WAY; break; } if (invalid) { // One more entry, others decrease by 1 for (i=0; i MSHR_THRESHOLD) && (remain_rq > RQ_THRESHOLD)) { // Try AUX to L2 if (l2_prefetch_line(0, base_addr, ((base_addr>>6)+1)<<6, FILL_L2)) { // Update statistics PF_inflight++; AUX_issue++; AUX_local++; AUX_attempt++; AUX_L2_issue++; AUX_L2_attempt++; remain_mshr--; remain_rq--; D(printf("\t[PREFETCH-L2-AUX] page: %llx pf_block: %d pf_issue: %d remain_mshr: %d remain_rq: %d\n", (base_addr >> 12), ((base_addr >> 6) & 0x3F) + 1, true, remain_mshr, remain_rq)); } else { D(printf("\t[PREFETCH-L2-AUX] Prefetch is NOT issued!\n")); AUX_fail++; AUX_local++; AUX_attempt++; AUX_L2_fail++; AUX_L2_attempt++; } } else { if ((remain_rq > RQ_THRESHOLD)) { if (l2_prefetch_line(0, base_addr, ((base_addr>>6)+1)<<6, FILL_LLC)) { PF_inflight++; AUX_issue++; AUX_local++; AUX_attempt++; AUX_LLC_issue++; AUX_LLC_attempt++; remain_rq--; D(printf("\t[PREFETCH-LLC-AUX] page: %llx pf_block: %d pf_issue: %d remain_mshr: %d remain_rq: %d\n", (base_addr >> 12), ((base_addr >> 6) & 0x3F) + 1, true, remain_mshr, remain_rq)); } else { D(printf("\t[PREFETCH-LLC-AUX] Prefetch is NOT issued!\n")); AUX_fail++; AUX_local++; PF_attempt++; AUX_LLC_fail++; AUX_LLC_fail++; } } else { D(printf("\t[PREFETCH-AUX] Not enough resource to activate AUX remain_mshr: %d remain_rq: %d\n", remain_mshr, remain_rq)); } } }