File tree Expand file tree Collapse file tree 7 files changed +123
-0
lines changed Expand file tree Collapse file tree 7 files changed +123
-0
lines changed Original file line number Diff line number Diff line change @@ -193,6 +193,12 @@ set(SOURCES
193
193
"${CMAKE_CURRENT_BINARY_DIR} /src/version.cc"
194
194
)
195
195
196
+ if (WIN32 )
197
+ set (SOURCES ${SOURCES} src/platform_specific/affinity.win.cc )
198
+ elseif (UNIX )
199
+ set (SOURCES ${SOURCES} src/platform_specific/affinity.unix.cc )
200
+ endif ()
201
+
196
202
add_library (
197
203
celerity_runtime
198
204
STATIC
Original file line number Diff line number Diff line change
1
+ #pragma once
2
+
3
+ #include < cstdint>
4
+
5
+ namespace celerity {
6
+ namespace detail {
7
+
8
+ uint32_t affinity_cores_available ();
9
+
10
+ /* a priori we need 3 threads, plus 1 for parallel-task workers and at least one more for host-task.
11
+ This depends on the application invoking celerity. */
12
+ constexpr static uint64_t min_cores_needed = 5 ;
13
+
14
+ } // namespace detail
15
+ } // namespace celerity
Original file line number Diff line number Diff line change
1
+ #pragma once
2
+
3
+ #include < cstdint>
4
+ #include < type_traits>
5
+
6
+ namespace celerity {
7
+ namespace detail {
8
+ namespace utils {
9
+
10
+ template <typename BitMaskT>
11
+ constexpr inline uint32_t popcount (const BitMaskT bit_mask) noexcept {
12
+ static_assert (std::is_integral_v<BitMaskT> && std::is_unsigned_v<BitMaskT>, " popcount argument needs to be an unsigned integer type." );
13
+
14
+ uint32_t counter = 0 ;
15
+ for (auto b = bit_mask; b; b >>= 1 ) {
16
+ counter += b & 1 ;
17
+ }
18
+ return counter;
19
+ }
20
+
21
+ } // namespace utils
22
+ } // namespace detail
23
+ } // namespace celerity
Original file line number Diff line number Diff line change
1
+
2
+ #include < cassert>
3
+ #include < cstdint>
4
+
5
+ #include < pthread.h>
6
+ #include < sched.h>
7
+
8
+ #include " affinity.h"
9
+
10
+ namespace celerity {
11
+ namespace detail {
12
+
13
+ uint32_t affinity_cores_available () {
14
+ cpu_set_t available_cores;
15
+ const auto affinity_error = pthread_getaffinity_np (pthread_self (), sizeof (cpu_set_t ), &available_cores);
16
+ assert (affinity_error == 0 && " Error retrieving affinity mask." );
17
+ return CPU_COUNT (&available_cores);
18
+ }
19
+
20
+ } // namespace detail
21
+ } // namespace celerity
Original file line number Diff line number Diff line change
1
+ #include < cassert>
2
+
3
+ #include < Windows.h>
4
+
5
+ #include " affinity.h"
6
+ #include " utils.h"
7
+
8
+ namespace celerity {
9
+ namespace detail {
10
+
11
+ uint32_t affinity_cores_available () {
12
+ using native_cpu_set = DWORD_PTR;
13
+
14
+ native_cpu_set available_cores;
15
+ [[maybe_unused]] native_cpu_set sys_affinity_mask;
16
+ const auto affinity_error = GetProcessAffinityMask (GetCurrentProcess (), &available_cores, &sys_affinity_mask);
17
+ assert (affinity_error != FALSE && " Error retrieving affinity mask." );
18
+ return utils::popcount (available_cores);
19
+ }
20
+
21
+ } // namespace detail
22
+ } // namespace celerity
Original file line number Diff line number Diff line change 12
12
13
13
#include < mpi.h>
14
14
15
+ #include " affinity.h"
15
16
#include " buffer.h"
16
17
#include " buffer_manager.h"
17
18
#include " command_graph.h"
23
24
#include " scheduler.h"
24
25
#include " task_manager.h"
25
26
#include " user_bench.h"
27
+ #include " utils.h"
26
28
#include " version.h"
27
29
28
30
namespace celerity {
@@ -98,8 +100,15 @@ namespace detail {
98
100
cfg = std::make_unique<config>(argc, argv, *default_logger);
99
101
graph_logger->set_level (cfg->get_log_level ());
100
102
103
+ if (const uint32_t cores = affinity_cores_available (); cores < min_cores_needed) {
104
+ default_logger->warn (" Celerity has detected that only {} logical cores are available to this process. It is recommended to assign at least {} "
105
+ " logical cores. Performance may be negatively impacted." ,
106
+ cores, min_cores_needed);
107
+ }
108
+
101
109
user_bench = std::make_unique<experimental::bench::detail::user_benchmarker>(*cfg, static_cast <node_id>(world_rank));
102
110
111
+
103
112
h_queue = std::make_unique<host_queue>(*default_logger);
104
113
d_queue = std::make_unique<device_queue>(*default_logger);
105
114
Original file line number Diff line number Diff line change 5
5
#include < memory>
6
6
#include < random>
7
7
8
+ #ifdef _WIN32
9
+ #include < Windows.h>
10
+ #else
11
+ #include < pthread.h>
12
+ #endif
13
+
8
14
#include < catch2/catch.hpp>
9
15
10
16
#include < celerity.h>
11
17
18
+ #include " affinity.h"
12
19
#include " ranges.h"
13
20
#include " region_map.h"
14
21
@@ -2305,5 +2312,25 @@ namespace detail {
2305
2312
}
2306
2313
}
2307
2314
2315
+
2316
+ TEST_CASE (" affinity check" , " [affinity]" ) {
2317
+ #ifdef _WIN32
2318
+ SECTION (" in Windows" ) {
2319
+ DWORD_PTR cpu_mask = 1 ;
2320
+ SetProcessAffinityMask (GetCurrentProcess (), cpu_mask);
2321
+ }
2322
+ #else
2323
+ SECTION (" in Posix" ) {
2324
+ cpu_set_t cpu_mask;
2325
+ CPU_ZERO (&cpu_mask);
2326
+ CPU_SET (0 , &cpu_mask);
2327
+ pthread_setaffinity_np (pthread_self (), sizeof (cpu_mask), &cpu_mask);
2328
+ }
2329
+ #endif
2330
+ const auto cores = affinity_cores_available ();
2331
+ REQUIRE (cores == 1 );
2332
+ }
2333
+
2334
+
2308
2335
} // namespace detail
2309
2336
} // namespace celerity
You can’t perform that action at this time.
0 commit comments