Skip to content

Commit 113e688

Browse files
authored
Print warning when too few logical cores are available
1 parent 30248fb commit 113e688

File tree

7 files changed

+123
-0
lines changed

7 files changed

+123
-0
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,12 @@ set(SOURCES
193193
"${CMAKE_CURRENT_BINARY_DIR}/src/version.cc"
194194
)
195195

196+
if(WIN32)
197+
set(SOURCES ${SOURCES} src/platform_specific/affinity.win.cc)
198+
elseif(UNIX)
199+
set(SOURCES ${SOURCES} src/platform_specific/affinity.unix.cc)
200+
endif()
201+
196202
add_library(
197203
celerity_runtime
198204
STATIC

include/affinity.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
5+
namespace celerity {
6+
namespace detail {
7+
8+
uint32_t affinity_cores_available();
9+
10+
/* a priori we need 3 threads, plus 1 for parallel-task workers and at least one more for host-task.
11+
This depends on the application invoking celerity. */
12+
constexpr static uint64_t min_cores_needed = 5;
13+
14+
} // namespace detail
15+
} // namespace celerity

include/utils.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
#include <type_traits>
5+
6+
namespace celerity {
7+
namespace detail {
8+
namespace utils {
9+
10+
template <typename BitMaskT>
11+
constexpr inline uint32_t popcount(const BitMaskT bit_mask) noexcept {
12+
static_assert(std::is_integral_v<BitMaskT> && std::is_unsigned_v<BitMaskT>, "popcount argument needs to be an unsigned integer type.");
13+
14+
uint32_t counter = 0;
15+
for(auto b = bit_mask; b; b >>= 1) {
16+
counter += b & 1;
17+
}
18+
return counter;
19+
}
20+
21+
} // namespace utils
22+
} // namespace detail
23+
} // namespace celerity
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
#include <cassert>
3+
#include <cstdint>
4+
5+
#include <pthread.h>
6+
#include <sched.h>
7+
8+
#include "affinity.h"
9+
10+
namespace celerity {
11+
namespace detail {
12+
13+
uint32_t affinity_cores_available() {
14+
cpu_set_t available_cores;
15+
const auto affinity_error = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &available_cores);
16+
assert(affinity_error == 0 && "Error retrieving affinity mask.");
17+
return CPU_COUNT(&available_cores);
18+
}
19+
20+
} // namespace detail
21+
} // namespace celerity

src/platform_specific/affinity.win.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include <cassert>
2+
3+
#include <Windows.h>
4+
5+
#include "affinity.h"
6+
#include "utils.h"
7+
8+
namespace celerity {
9+
namespace detail {
10+
11+
uint32_t affinity_cores_available() {
12+
using native_cpu_set = DWORD_PTR;
13+
14+
native_cpu_set available_cores;
15+
[[maybe_unused]] native_cpu_set sys_affinity_mask;
16+
const auto affinity_error = GetProcessAffinityMask(GetCurrentProcess(), &available_cores, &sys_affinity_mask);
17+
assert(affinity_error != FALSE && "Error retrieving affinity mask.");
18+
return utils::popcount(available_cores);
19+
}
20+
21+
} // namespace detail
22+
} // namespace celerity

src/runtime.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <mpi.h>
1414

15+
#include "affinity.h"
1516
#include "buffer.h"
1617
#include "buffer_manager.h"
1718
#include "command_graph.h"
@@ -23,6 +24,7 @@
2324
#include "scheduler.h"
2425
#include "task_manager.h"
2526
#include "user_bench.h"
27+
#include "utils.h"
2628
#include "version.h"
2729

2830
namespace celerity {
@@ -98,8 +100,15 @@ namespace detail {
98100
cfg = std::make_unique<config>(argc, argv, *default_logger);
99101
graph_logger->set_level(cfg->get_log_level());
100102

103+
if(const uint32_t cores = affinity_cores_available(); cores < min_cores_needed) {
104+
default_logger->warn("Celerity has detected that only {} logical cores are available to this process. It is recommended to assign at least {} "
105+
"logical cores. Performance may be negatively impacted.",
106+
cores, min_cores_needed);
107+
}
108+
101109
user_bench = std::make_unique<experimental::bench::detail::user_benchmarker>(*cfg, static_cast<node_id>(world_rank));
102110

111+
103112
h_queue = std::make_unique<host_queue>(*default_logger);
104113
d_queue = std::make_unique<device_queue>(*default_logger);
105114

test/runtime_tests.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,17 @@
55
#include <memory>
66
#include <random>
77

8+
#ifdef _WIN32
9+
#include <Windows.h>
10+
#else
11+
#include <pthread.h>
12+
#endif
13+
814
#include <catch2/catch.hpp>
915

1016
#include <celerity.h>
1117

18+
#include "affinity.h"
1219
#include "ranges.h"
1320
#include "region_map.h"
1421

@@ -2305,5 +2312,25 @@ namespace detail {
23052312
}
23062313
}
23072314

2315+
2316+
TEST_CASE("affinity check", "[affinity]") {
2317+
#ifdef _WIN32
2318+
SECTION("in Windows") {
2319+
DWORD_PTR cpu_mask = 1;
2320+
SetProcessAffinityMask(GetCurrentProcess(), cpu_mask);
2321+
}
2322+
#else
2323+
SECTION("in Posix") {
2324+
cpu_set_t cpu_mask;
2325+
CPU_ZERO(&cpu_mask);
2326+
CPU_SET(0, &cpu_mask);
2327+
pthread_setaffinity_np(pthread_self(), sizeof(cpu_mask), &cpu_mask);
2328+
}
2329+
#endif
2330+
const auto cores = affinity_cores_available();
2331+
REQUIRE(cores == 1);
2332+
}
2333+
2334+
23082335
} // namespace detail
23092336
} // namespace celerity

0 commit comments

Comments
 (0)