Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 86 additions & 86 deletions ci/perf/gpuc2_bench.csv

Large diffs are not rendered by default.

174 changes: 87 additions & 87 deletions ci/perf/gpuc2_bench.md

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions include/command.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ namespace detail {
}
bool is_flushed() const { return m_flushed; }

// TODO: Consider only having this in debug builds
std::string debug_label;

private:
// Should only be possible to add/remove dependencies using command_graph.
using parent_type = intrusive_graph_node<abstract_command>;
Expand Down Expand Up @@ -80,7 +77,7 @@ namespace detail {

class await_push_command final : public abstract_command {
friend class command_graph;
await_push_command(command_id cid, node_id nid, push_command* source) : abstract_command(cid, nid), m_source(source) {}
await_push_command(command_id cid, node_id nid, push_command* source) : abstract_command(cid, nid), m_source(source) { assert(source != nullptr); }

public:
push_command* get_source() const { return m_source; }
Expand Down
3 changes: 2 additions & 1 deletion include/command_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
namespace celerity {
namespace detail {

class reduction_manager;
class task_manager;

// TODO: Could be extended (using SFINAE) to support additional iterator types (e.g. random access)
Expand Down Expand Up @@ -127,7 +128,7 @@ namespace detail {

auto& task_commands(task_id tid) { return m_by_task.at(tid); }

std::optional<std::string> print_graph(size_t max_nodes, const task_manager& tm) const;
std::optional<std::string> print_graph(size_t max_nodes, const task_manager& tm, const reduction_manager& rm) const;

// TODO unify dependency terminology to this
void add_dependency(abstract_command* depender, abstract_command* dependee, dependency_kind kind, dependency_origin origin) {
Expand Down
5 changes: 3 additions & 2 deletions include/print_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ namespace celerity {
namespace detail {

class command_graph;
class reduction_manager;
class task_manager;

std::string print_task_graph(const task_ring_buffer& tdag);
std::string print_command_graph(const command_graph& cdag, const task_manager& tm);
std::string print_task_graph(const task_ring_buffer& tdag, const reduction_manager& rm);
std::string print_command_graph(const command_graph& cdag, const task_manager& tm, const reduction_manager& rm);

} // namespace detail
} // namespace celerity
2 changes: 1 addition & 1 deletion include/reduction_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace detail {
return m_reductions.count(rid) != 0;
}

reduction_info get_reduction(reduction_id rid) {
reduction_info get_reduction(reduction_id rid) const {
std::lock_guard lock{m_mutex};
return m_reductions.at(rid)->get_info();
}
Expand Down
4 changes: 2 additions & 2 deletions src/command_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace detail {
}
}

std::optional<std::string> command_graph::print_graph(size_t max_nodes, const task_manager& tm) const {
if(command_count() <= max_nodes) { return detail::print_command_graph(*this, tm); }
std::optional<std::string> command_graph::print_graph(size_t max_nodes, const task_manager& tm, const reduction_manager& rm) const {
if(command_count() <= max_nodes) { return detail::print_command_graph(*this, tm, rm); }
return std::nullopt;
}

Expand Down
15 changes: 0 additions & 15 deletions src/graph_generator.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include "graph_generator.h"

#include <allscale/utils/string_utils.h>

#include "access_modes.h"
#include "command.h"
#include "command_graph.h"
Expand Down Expand Up @@ -349,7 +347,6 @@ namespace detail {
// must read that original value in the eventual reduction_command generated by a future buffer requirement. Since whenever a buffer is used as
// a reduction output, we replace its state with a pending_reduction_state, that original value would be lost. To avoid duplicating the buffer,
// we simply include it in the pre-reduced state of a single execution_command.
std::unordered_map<buffer_id, reduction_id> buffer_reduction_map;
for(auto rid : tsk.get_reductions()) {
auto reduction = m_reduction_mngr.get_reduction(rid);

Expand All @@ -365,9 +362,6 @@ namespace detail {

// We need to add a proper requirement here because bid might itself be in pending_reduction_state
requirements[bid][rmode] = GridRegion<3>{{1, 1, 1}};

// TODO fill in debug build only
buffer_reduction_map.emplace(bid, rid);
}

for(auto& it : requirements) {
Expand Down Expand Up @@ -397,13 +391,6 @@ namespace detail {

for(const auto mode : required_modes) {
const auto& req = reqs_by_mode.at(mode);

// Add access mode and range to execution command node label for debugging
if(auto rid_iter = buffer_reduction_map.find(bid); rid_iter != buffer_reduction_map.end()) {
cmd->debug_label += fmt::format("(R{}) ", rid_iter->second);
}
cmd->debug_label += fmt::format("{} {} {}\\n", detail::access::mode_traits::name(mode), bid, toString(req));

if(detail::access::mode_traits::is_consumer(mode)) {
// Store the read access for determining anti-dependencies later on
m_command_buffer_reads[cid][bid] = GridRegion<3>::merge(m_command_buffer_reads[cid][bid], req);
Expand Down Expand Up @@ -602,8 +589,6 @@ namespace detail {
// Simplification: If there are multiple chunks per node, we generate true-dependencies between them in an arbitrary order, when all we really
// need is mutual exclusion (i.e. a bi-directional pseudo-dependency).
nd.host_object_last_effects.insert_or_assign(hoid, cmd->get_cid());

cmd->debug_label += fmt::format("affect host-object {}\\n", hoid);
}
}
}
Expand Down
222 changes: 135 additions & 87 deletions src/print_graph.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include "print_graph.h"

#include <sstream>

#include <spdlog/fmt/fmt.h>
#include <spdlog/fmt/ostr.h>

Expand All @@ -24,133 +22,183 @@ namespace detail {
}
}

std::string get_task_label(const task* tsk) {
switch(tsk->get_type()) {
case task_type::epoch: return fmt::format("Task {} (epoch)", tsk->get_id());
case task_type::host_compute: return fmt::format("Task {} (host-compute)", tsk->get_id());
case task_type::device_compute: return fmt::format("Task {} ({})", tsk->get_id(), tsk->get_debug_name());
case task_type::collective: return fmt::format("Task {} (collective #{})", tsk->get_id(), static_cast<size_t>(tsk->get_collective_group_id()));
case task_type::master_node: return fmt::format("Task {} (master-node)", tsk->get_id());
case task_type::horizon: return fmt::format("Task {} (horizon)", tsk->get_id());
default: assert(false); return fmt::format("Task {} (unknown)", tsk->get_id());
const char* task_type_string(const task_type tt) {
switch(tt) {
case task_type::epoch: return "epoch";
case task_type::host_compute: return "host-compute";
case task_type::device_compute: return "device-compute";
case task_type::collective: return "collective host";
case task_type::master_node: return "master-node host";
case task_type::horizon: return "horizon";
default: return "unknown";
}
}

std::string print_task_graph(const task_ring_buffer& tdag) {
std::ostringstream ss;
ss << "digraph G { label=\"Task Graph\" ";
void format_requirements(std::string& label, const task& tsk, subrange<3> execution_range, access_mode reduction_init_mode, const reduction_manager& rm) {
for(auto rid : tsk.get_reductions()) {
auto reduction = rm.get_reduction(rid);

for(auto tsk : tdag) {
std::unordered_map<std::string, std::string> props;
props["label"] = "\"" + get_task_label(tsk) + "\"";
auto rmode = cl::sycl::access::mode::discard_write;
if(reduction.initialize_from_buffer) { rmode = reduction_init_mode; }

const auto bid = reduction.output_buffer_id;
const auto req = GridRegion<3>{{1, 1, 1}};
fmt::format_to(std::back_inserter(label), "<br/>(R{}) <i>{}</i> B{} {}", rid, detail::access::mode_traits::name(rmode), bid, req);
}

ss << tsk->get_id();
ss << "[";
for(const auto& it : props) {
ss << " " << it.first << "=" << it.second;
const auto& bam = tsk.get_buffer_access_map();
for(const auto bid : bam.get_accessed_buffers()) {
for(const auto mode : bam.get_access_modes(bid)) {
const auto req = bam.get_requirements_for_access(bid, mode, tsk.get_dimensions(), execution_range, tsk.get_global_size());
// While uncommon, we do support chunks that don't require access to a particular buffer at all.
if(!req.empty()) { fmt::format_to(std::back_inserter(label), "<br/><i>{}</i> B{} {}", detail::access::mode_traits::name(mode), bid, req); }
}
ss << "];";
}

for(const auto& [hoid, order] : tsk.get_side_effect_map()) {
fmt::format_to(std::back_inserter(label), "<br/><i>affect</i> H{}", hoid);
}
}

std::string get_task_label(const task& tsk, const reduction_manager& rm) {
std::string label;
fmt::format_to(std::back_inserter(label), "T{}", tsk.get_id());
if(!tsk.get_debug_name().empty()) { fmt::format_to(std::back_inserter(label), " \"{}\" ", tsk.get_debug_name()); }

const auto execution_range = subrange<3>{tsk.get_global_offset(), tsk.get_global_size()};

fmt::format_to(std::back_inserter(label), "<br/><b>{}</b>", task_type_string(tsk.get_type()));
if(tsk.get_type() == task_type::host_compute || tsk.get_type() == task_type::device_compute) {
fmt::format_to(std::back_inserter(label), " {}", execution_range);
} else if(tsk.get_type() == task_type::collective) {
fmt::format_to(std::back_inserter(label), " in CG{}", tsk.get_collective_group_id());
}

format_requirements(label, tsk, execution_range, access_mode::read_write, rm);

return label;
}

std::string print_task_graph(const task_ring_buffer& tdag, const reduction_manager& rm) {
std::string dot = "digraph G {label=\"Task Graph\" ";

for(auto tsk : tdag) {
const auto shape = tsk->get_type() == task_type::epoch || tsk->get_type() == task_type::horizon ? "ellipse" : "box style=rounded";
fmt::format_to(std::back_inserter(dot), "{}[shape={} label=<{}>];", tsk->get_id(), shape, get_task_label(*tsk, rm));
for(auto d : tsk->get_dependencies()) {
ss << fmt::format("{} -> {} [{}];", d.node->get_id(), tsk->get_id(), dependency_style(d));
fmt::format_to(std::back_inserter(dot), "{}->{}[{}];", d.node->get_id(), tsk->get_id(), dependency_style(d));
}
}

ss << "}";
return ss.str();
dot += "}";
return dot;
}

std::string get_command_label(const abstract_command* cmd) {
std::string label = fmt::format("[{}] Node {}:\\n", cmd->get_cid(), cmd->get_nid());
if(const auto ecmd = dynamic_cast<const epoch_command*>(cmd)) {
label += "epoch";
std::string get_command_label(const abstract_command& cmd, const task_manager& tm, const reduction_manager& rm) {
const command_id cid = cmd.get_cid();
const node_id nid = cmd.get_nid();

std::string label = fmt::format("C{} on N{}<br/>", cid, nid);

if(const auto ecmd = dynamic_cast<const epoch_command*>(&cmd)) {
label += "<b>epoch</b>";
if(ecmd->get_epoch_action() == epoch_action::barrier) { label += " (barrier)"; }
if(ecmd->get_epoch_action() == epoch_action::shutdown) { label += " (shutdown)"; }
} else if(const auto xcmd = dynamic_cast<const execution_command*>(cmd)) {
label += fmt::format("execution {}\\n{}", subrange_to_grid_box(xcmd->get_execution_range()), cmd->debug_label);
} else if(const auto pcmd = dynamic_cast<const push_command*>(cmd)) {
if(pcmd->get_rid()) { label += fmt::format("(R{}) ", pcmd->get_rid()); }
label += fmt::format("push {} to {}\\n {}", pcmd->get_bid(), pcmd->get_target(), subrange_to_grid_box(pcmd->get_range()));
} else if(const auto apcmd = dynamic_cast<const await_push_command*>(cmd)) {
} else if(const auto xcmd = dynamic_cast<const execution_command*>(&cmd)) {
fmt::format_to(std::back_inserter(label), "<b>execution</b> {}", subrange_to_grid_box(xcmd->get_execution_range()));
} else if(const auto pcmd = dynamic_cast<const push_command*>(&cmd)) {
if(pcmd->get_rid()) { fmt::format_to(std::back_inserter(label), "(R{}) ", pcmd->get_rid()); }
fmt::format_to(
std::back_inserter(label), "<b>push</b> to N{}<br/>B{} {}", pcmd->get_target(), pcmd->get_bid(), subrange_to_grid_box(pcmd->get_range()));
} else if(const auto apcmd = dynamic_cast<const await_push_command*>(&cmd)) {
if(apcmd->get_source()->get_rid()) { label += fmt::format("(R{}) ", apcmd->get_source()->get_rid()); }
label += fmt::format("await push {} from {}\\n {}", apcmd->get_source()->get_bid(), apcmd->get_source()->get_nid(),
fmt::format_to(std::back_inserter(label), "<b>await push</b> from N{}<br/>B{} {}", apcmd->get_source()->get_nid(), apcmd->get_source()->get_bid(),
subrange_to_grid_box(apcmd->get_source()->get_range()));
} else if(const auto rrcmd = dynamic_cast<const reduction_command*>(cmd)) {
label += fmt::format("reduction {}", rrcmd->get_rid());
} else if(const auto hcmd = dynamic_cast<const horizon_command*>(cmd)) {
label += "horizon";
} else if(const auto rrcmd = dynamic_cast<const reduction_command*>(&cmd)) {
const auto reduction = rm.get_reduction(rrcmd->get_rid());
const auto req = GridRegion<3>{{1, 1, 1}};
fmt::format_to(std::back_inserter(label), "<b>reduction</b> R{}<br/>B{} {}", rrcmd->get_rid(), reduction.output_buffer_id, req);
} else if(const auto hcmd = dynamic_cast<const horizon_command*>(&cmd)) {
label += "<b>horizon</b>";
} else {
assert(!"Unkown command");
return fmt::format("[{}] UNKNOWN\\n{}", cmd->get_cid(), cmd->debug_label);
label += "<b>unknown</b>";
}

if(const auto tcmd = dynamic_cast<const task_command*>(&cmd)) {
const auto& tsk = *tm.get_task(tcmd->get_tid());

auto reduction_init_mode = access_mode::discard_write;
auto execution_range = subrange<3>{tsk.get_global_offset(), tsk.get_global_size()};
if(const auto ecmd = dynamic_cast<const execution_command*>(&cmd)) {
if(ecmd->is_reduction_initializer()) { reduction_init_mode = cl::sycl::access::mode::read_write; }
execution_range = ecmd->get_execution_range();
}

format_requirements(label, tsk, execution_range, reduction_init_mode, rm);
}

return label;
}

std::string print_command_graph(const command_graph& cdag, const task_manager& tm) {
std::ostringstream main_ss;
std::unordered_map<task_id, std::ostringstream> task_subgraph_ss;
std::string print_command_graph(const command_graph& cdag, const task_manager& tm, const reduction_manager& rm) {
std::string main_dot;
std::unordered_map<task_id, std::string> task_subgraph_dot;

const auto write_vertex = [&](std::ostream& out, abstract_command* cmd) {
const char* colors[] = {"black", "crimson", "dodgerblue4", "goldenrod", "maroon4", "springgreen2", "tan1", "chartreuse2"};
const auto print_vertex = [&](const abstract_command& cmd) {
static const char* const colors[] = {"black", "crimson", "dodgerblue4", "goldenrod", "maroon4", "springgreen2", "tan1", "chartreuse2"};

std::unordered_map<std::string, std::string> props;
props["label"] = "\"" + get_command_label(cmd) + "\"";
props["fontcolor"] = colors[cmd->get_nid() % (sizeof(colors) / sizeof(char*))];
if(isa<task_command>(cmd)) { props["shape"] = "box"; }

out << cmd->get_cid();
out << "[";
for(const auto& it : props) {
out << " " << it.first << "=" << it.second;
}
out << "];";
const auto name = cmd.get_cid();
const auto label = get_command_label(cmd, tm, rm);
const auto fontcolor = colors[cmd.get_nid() % (sizeof(colors) / sizeof(char*))];
const auto shape = isa<task_command>(&cmd) ? "box" : "ellipse";
return fmt::format("{}[label=<{}> fontcolor={} shape={}];", name, label, fontcolor, shape);
};

const auto write_command = [&](auto* cmd) {
if(const auto tcmd = dynamic_cast<task_command*>(cmd)) {
for(const auto cmd : cdag.all_commands()) {
if(const auto tcmd = dynamic_cast<const task_command*>(cmd)) {
const auto tid = tcmd->get_tid();
// Add to subgraph as well
if(task_subgraph_ss.find(tcmd->get_tid()) == task_subgraph_ss.end()) {
if(task_subgraph_dot.count(tid) == 0) {
std::string task_label;
if(const auto tsk = tm.find_task(tcmd->get_tid())) {
task_label = get_task_label(tsk);
fmt::format_to(std::back_inserter(task_label), "T{} ", tid);
if(const auto tsk = tm.find_task(tid)) {
if(!tsk->get_debug_name().empty()) { fmt::format_to(std::back_inserter(task_label), "\"{}\" ", tsk->get_debug_name()); }
task_label += "(";
task_label += task_type_string(tsk->get_type());
if(tsk->get_type() == task_type::collective) {
fmt::format_to(std::back_inserter(task_label), " on CG{}", tsk->get_collective_group_id());
}
task_label += ")";
} else {
task_label = fmt::format("Task {} (deleted)", tcmd->get_tid());
task_label += "(deleted)";
}
task_subgraph_ss[tcmd->get_tid()] << fmt::format("subgraph cluster_{} {{ label=\"{}\"; color=gray;", tcmd->get_tid(), task_label);
task_subgraph_dot.emplace(
tid, fmt::format("subgraph cluster_{}{{label=<<font color=\"#606060\">{}</font>>;color=darkgray;", tid, task_label));
}
write_vertex(task_subgraph_ss[tcmd->get_tid()], cmd);
task_subgraph_dot[tid] += print_vertex(*cmd);
} else {
write_vertex(main_ss, cmd);
main_dot += print_vertex(*cmd);
}

for(auto d : cmd->get_dependencies()) {
main_ss << fmt::format("{} -> {} [{}];", d.node->get_cid(), cmd->get_cid(), dependency_style(d));
for(const auto& d : cmd->get_dependencies()) {
fmt::format_to(std::back_inserter(main_dot), "{}->{}[{}];", d.node->get_cid(), cmd->get_cid(), dependency_style(d));
}

// Add a dashed line to the corresponding push
if(isa<await_push_command>(cmd)) {
auto await_push = static_cast<await_push_command*>(cmd);
main_ss << fmt::format("{} -> {} [style=dashed color=gray40];", await_push->get_source()->get_cid(), cmd->get_cid());
if(const auto apcmd = dynamic_cast<const await_push_command*>(cmd)) {
fmt::format_to(std::back_inserter(main_dot), "{}->{}[style=dashed color=gray40];", apcmd->get_source()->get_cid(), cmd->get_cid());
}
};

for(auto cmd : cdag.all_commands()) {
write_command(cmd);
}

// Close all subgraphs
for(auto& sg : task_subgraph_ss) {
sg.second << "}";
}

std::ostringstream result_ss;
result_ss << "digraph G { label=\"Command Graph\" ";
for(auto& sg : task_subgraph_ss) {
result_ss << sg.second.str();
std::string result_dot = "digraph G{label=\"Command Graph\" ";
for(auto& [sg_tid, sg_dot] : task_subgraph_dot) {
result_dot += sg_dot;
result_dot += "}";
}
result_ss << main_ss.str();
result_ss << "}";
return result_ss.str();
result_dot += main_dot;
result_dot += "}";
return result_dot;
}

} // namespace detail
Expand Down
Loading