GT/GTI: fix_hgx_gpu_model_missing_issue

Summary: # Description Per Nv doc#1069815 NVIDIA HGX Baseboard Redfish Design Collaterals and doc#1108844 Blackwell HGX 8-GPU Redfish Design Collateral, gpu model name should be retrived by the below refish command: curl -X GET http://192.168.31.1/redfish/v1/Chassis/HGX_Chassis_0/Assembly Here are the changes in the PR: 1. Use the above command to retrieve GPU model 2. store gpu model name into kv file once read successfully 3. If can't retrieve gpu model, return the default model (H100) Example: root@bmc-oob:~# curl -X GET http://192.168.31.1/redfish/v1/Chassis/HGX_Chassis_0/Assembly { "odata.id": "/redfish/v1/Chassis/HGX_Chassis_0/Assembly", "odata.type": "#Assembly.v1_3_0.Assembly", "Assemblies": [ { "odata.id": "/redfish/v1/Chassis/HGX_Chassis_0/Assembly#/Assemblies/0", "Location": { "PartLocation": { "LocationType": "Embedded" } }, "MemberId": "0", "Model": "P5612-B00", "Name": "Board FRU Assembly", "PartNumber": "699-25612-0002-204", "PhysicalContext": "GPUSubsystem", "ProductionDate": "2022-12-17T08:54:00Z", "SerialNumber": "1665022550432", "Vendor": "NVIDIA" }, { "odata.id": "/redfish/v1/Chassis/HGX_Chassis_0/Assembly#/Assemblies/1", "Location": { "PartLocation": { "LocationType": "Embedded" } }, "MemberId": "1", "Model": "NVIDIA HGX H100 8-GPU", . . . X-link: facebookexternal/openbmc.quanta#4429 Test Plan: Build and test pass on GTI system with FW bundle v1.3, v1.4 and v1.5 root@bmc-oob:~# rm /tmp/cache_store/gpu_model root@bmc-oob:~# fw-util hgx --version erot-fpga Version: 00.02.0150.0000_n00 root@bmc-oob:~# kv get gpu_model "NVIDIA HGX H100 8-GPU" root@bmc-oob:~# sensor-util hgx hgx: HGX_HSC_0_Power_W (0x1) : 88.292 Watts | (ok) HGX_HSC_1_Power_W (0x Reviewed By: amithash Differential Revision: D62107276 fbshipit-source-id: 0cf70a50647340a4124431a589b4574cb00248fe
facebook · Sep 5, 2024 · c6aa0e0 · c6aa0e0
1 parent 4566b37
commit c6aa0e0
Showing 1 changed file with 40 additions and 27 deletions.
diff --git a/meta-facebook/meta-grandteton/recipes-grandteton/hgx/files/hgx.cpp b/meta-facebook/meta-grandteton/recipes-grandteton/hgx/files/hgx.cpp
@@ -316,52 +316,65 @@ std::string updateNonBlocking(const std::string& comp, const std::string& path,
 }
 
 HMCPhase getHMCPhase() {
-  static HMCPhase phase = HMCPhase::HMC_FW_UNKNOWN;
-  // TODO Try to do this with one Get at the root.
+  HMCPhase phase = HMCPhase::BMC_FW_DVT;
+  try {
+    std::string strModel = kv::get("gpu_model");
+    if (containStr(strModel, {"H100", "H200"})) {
+      return HMCPhase::BMC_FW_DVT;
+    }
+    else if (containStr(strModel, {"B100", "B200"})) {
+      return HMCPhase::BMC_FW_B100;
+    }
+    else if (containStr(strModel, {"HGX_DVT"})) {
+      return HMCPhase::HMC_FW_DVT;
+    }
+    else if (containStr(strModel, {"HGX_EVT"})) {
+      return HMCPhase::HMC_FW_EVT;
+    }
+  } catch (std::exception& e) {
+    //do nothing
+  }
+
   auto tryPhase = [](const std::string& url) {
     try {
       hgx.get(url);
       return true;
-    } catch (std::exception&) {
+    } catch (std::exception& e) {
       return false;
     }
   };
-  if (phase != HMCPhase::HMC_FW_UNKNOWN) {
-    return phase;
-  }
 
   if (tryPhase(HMC_FW_INVENTORY + "HGX_FW_BMC_0")) {
-    std::vector<std::string> urls = {
-        HMC_URL + "Chassis/HGX_Chassis_0",
-        HMC_URL + "Chassis/HGX_BMC_0"
-    };
-    nlohmann::json jurl;
-
-    for (const auto& url : urls) {
-      jurl = nlohmann::json::parse(hgx.get(url));
-      if (jurl.contains("Model")) {
-        auto model = jurl["Model"].dump();
-        if (containStr(model, {"H100"})) {
-          phase = HMCPhase::BMC_FW_DVT;
-          break;
-        }
-        else if (containStr(model, {"B100"})) {
-          phase = HMCPhase::BMC_FW_B100;
-          break;
+    std::string url = HMC_URL + "Chassis/HGX_Chassis_0/Assembly";
+    phase = HMCPhase::BMC_FW_DVT;
+    json jurl = json::parse(hgx.get(url));
+
+    if (jurl.contains("Assemblies")) {
+      json &tempArray = jurl["Assemblies"];
+      for (auto &x : tempArray) {
+        if (x.contains("Model"))  {
+          auto model = x["Model"].dump();
+          if (containStr(model, {"H100", "H200"})) {
+            kv::set("gpu_model", model);
+            return HMCPhase::BMC_FW_DVT;
+          }
+          else if (containStr(model, {"B100", "B200"})) {
+            kv::set("gpu_model", model);
+            return HMCPhase::BMC_FW_B100;
+          }
         }
       }
-      phase = HMC_FW_UNKNOWN;
     }
   }
   else if (tryPhase(HMC_FW_INVENTORY + "HGX_FW_HMC_0")) {
+    kv::set("gpu_model", "HGX_DVT");
     phase = HMCPhase::HMC_FW_DVT;
   }
   else if (tryPhase(HMC_FW_INVENTORY + "HMC_Firmware")) {
+    kv::set("gpu_model", "HGX_EVT");
     phase = HMCPhase::HMC_FW_EVT;
   }
-  else {
-    phase = HMC_FW_UNKNOWN;
-  }
+
   return phase;
 }