Skip to content

Implement create ethernet map #722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ target_sources(
xy_pair.cpp
utils/lock_manager.cpp
utils/robust_mutex.cpp
topology_discovery.cpp
remote_communication.cpp
)

Expand Down
3 changes: 3 additions & 0 deletions device/api/umd/device/chip/remote_chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ class LocalChip;
class RemoteChip : public Chip {
public:
RemoteChip(tt_SocDescriptor soc_descriptor, eth_coord_t eth_chip_location, LocalChip* local_chip);

RemoteChip(tt_SocDescriptor soc_descriptor, ChipInfo chip_info);

bool is_mmio_capable() const override;

void start_device() override;
Expand Down
86 changes: 86 additions & 0 deletions device/api/umd/device/topology_discovery.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include "umd/device/chip/chip.h"
#include "umd/device/tt_device/tt_device.h"

class tt_ClusterDescriptor;

namespace tt::umd {

// TopologyDiscovery class creates cluster descriptor only for Wormhole configurations with old routing fw.
// TODO: Move Blackhole and 6U topology discovery to this class.
class TopologyDiscovery {
public:
std::unique_ptr<tt_ClusterDescriptor> create_ethernet_map();

private:
struct EthAddresses {
uint32_t masked_version;

uint64_t version;
uint64_t boot_params;
uint64_t node_info;
uint64_t eth_conn_info;
uint64_t debug_buf;
uint64_t results_buf;
bool shelf_rack_routing;
uint64_t heartbeat;
uint64_t erisc_app;
uint64_t erisc_app_config;
uint64_t erisc_remote_board_type_offset;
uint64_t erisc_local_board_type_offset;
};

static EthAddresses get_eth_addresses(uint32_t eth_fw_version);

void get_pcie_connected_chips();

void discover_remote_chips();

void fill_cluster_descriptor_info();

// TODO: this should be moved to class similar to TTDevice for MMIO devices.
// Covered by the UMD issue https://github.com/tenstorrent/tt-umd/issues/730.
uint32_t remote_arc_msg(
eth_coord_t eth_coord,
uint32_t msg_code,
uint32_t arg0,
uint32_t arg1,
uint32_t* ret0,
uint32_t* ret1,
Chip* mmio_chip,
uint32_t timeout_ms = 5000);

// TODO: this should be moved to class similar to TTDevice for MMIO devices.
// Covered by the UMD issue https://github.com/tenstorrent/tt-umd/issues/730.
ChipInfo read_non_mmio_chip_info(eth_coord_t eth_coord, Chip* mmio_chip);

// TODO: this should be moved to class similar to TTDevice for MMIO devices.
// Covered by the UMD issue https://github.com/tenstorrent/tt-umd/issues/730.
BoardType get_board_type(eth_coord_t eth_coord, Chip* mmio_chip);

std::unordered_map<chip_id_t, std::unique_ptr<Chip>> chips;

std::unordered_map<eth_coord_t, chip_id_t> eth_coord_to_chip_id;

std::unordered_map<chip_id_t, eth_coord_t> eth_coords;

// Remote transfer eth cores for each TTDevice, key of the map is pcie device that we
// create tt device for.
std::unordered_map<uint32_t, std::vector<tt_xy_pair>> remote_transfer_ethernet_cores;

std::vector<std::pair<std::pair<chip_id_t, uint32_t>, std::pair<chip_id_t, uint32_t>>> ethernet_connections;

std::unique_ptr<tt_ClusterDescriptor> cluster_desc;

chip_id_t chip_id = 0;

EthAddresses eth_addresses;
};

} // namespace tt::umd
2 changes: 2 additions & 0 deletions device/api/umd/device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "umd/device/chip/chip.h"
#include "umd/device/cluster.h"
#include "umd/device/topology_discovery.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"
Expand All @@ -28,6 +29,7 @@ class Node;

class tt_ClusterDescriptor {
friend class tt::umd::Cluster;
friend class tt::umd::TopologyDiscovery;

private:
tt_ClusterDescriptor() = default;
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/tt_device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ class TTDevice {

ArcMessenger *get_arc_messenger() const;

ArcTelemetryReader *get_arc_telemetry_reader() const;

virtual uint32_t get_clock();

virtual uint32_t get_max_clock_freq();
Expand Down
2 changes: 2 additions & 0 deletions device/chip/remote_chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ RemoteChip::RemoteChip(tt_SocDescriptor soc_descriptor, eth_coord_t eth_chip_loc
log_assert(soc_descriptor_.arch != tt::ARCH::BLACKHOLE, "Non-MMIO targets not supported in Blackhole");
}

RemoteChip::RemoteChip(tt_SocDescriptor soc_descriptor, ChipInfo chip_info) : Chip(chip_info, soc_descriptor) {}

bool RemoteChip::is_mmio_capable() const { return false; }

void RemoteChip::start_device() {}
Expand Down
Loading