Skip to content

Commit b8e297d

Browse files
committed
[OpenMP][libomptarget] Improve kernel initialization in plugins
This patch modifies the plugins so that the initialization of KernelTy objects is done in the init method. Part of the initialization was done in the constructKernelEntry method. Now this method is called constructKernel and only allocates and constructs a KernelTy object. This patch prepares the kernel class for the new implementation of device reductions. Differential Revision: https://reviews.llvm.org/D156917
1 parent 16f6f19 commit b8e297d

File tree

5 files changed

+126
-114
lines changed

5 files changed

+126
-114
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,20 +1899,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
18991899
uint64_t getClockFrequency() const override { return ClockFrequency; }
19001900

19011901
/// Allocate and construct an AMDGPU kernel.
1902-
Expected<GenericKernelTy *>
1903-
constructKernelEntry(const __tgt_offload_entry &KernelEntry,
1904-
DeviceImageTy &Image) override {
1902+
Expected<GenericKernelTy &>
1903+
constructKernel(const __tgt_offload_entry &KernelEntry,
1904+
OMPTgtExecModeFlags ExecMode) override {
1905+
// Allocate and construct the AMDGPU kernel.
1906+
AMDGPUKernelTy *AMDGPUKernel = Plugin::get().allocate<AMDGPUKernelTy>();
1907+
if (!AMDGPUKernel)
1908+
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
19051909

1906-
Expected<OMPTgtExecModeFlags> ExecModeOrErr =
1907-
getExecutionModeForKernel(KernelEntry.name, Image);
1908-
if (!ExecModeOrErr)
1909-
return ExecModeOrErr.takeError();
1910+
new (AMDGPUKernel) AMDGPUKernelTy(KernelEntry.name, ExecMode);
19101911

1911-
// Allocate and initialize the AMDGPU kernel.
1912-
AMDGPUKernelTy *AMDKernel = Plugin::get().allocate<AMDGPUKernelTy>();
1913-
new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeOrErr.get());
1914-
1915-
return AMDKernel;
1912+
return *AMDGPUKernel;
19161913
}
19171914

19181915
/// Set the current context to this device's context. Do nothing since the

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -746,21 +746,25 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
746746
__tgt_offload_entry &DeviceEntry) {
747747
DeviceEntry = KernelEntry;
748748

749+
// Retrieve the execution mode.
750+
auto ExecModeOrErr = getExecutionModeForKernel(KernelEntry.name, Image);
751+
if (!ExecModeOrErr)
752+
return ExecModeOrErr.takeError();
753+
749754
// Create a kernel object.
750-
auto KernelOrErr = constructKernelEntry(KernelEntry, Image);
755+
auto KernelOrErr = constructKernel(KernelEntry, *ExecModeOrErr);
751756
if (!KernelOrErr)
752757
return KernelOrErr.takeError();
753758

754-
GenericKernelTy *Kernel = *KernelOrErr;
755-
assert(Kernel != nullptr && "Invalid kernel");
759+
GenericKernelTy &Kernel = *KernelOrErr;
756760

757761
// Initialize the kernel.
758-
if (auto Err = Kernel->init(*this, Image))
762+
if (auto Err = Kernel.init(*this, Image))
759763
return Err;
760764

761765
// Set the device entry address to the kernel address and store the entry on
762766
// the entry table.
763-
DeviceEntry.addr = (void *)Kernel;
767+
DeviceEntry.addr = (void *)&Kernel;
764768
Image.getOffloadEntryTable().addEntry(DeviceEntry);
765769

766770
return Plugin::success();

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -794,9 +794,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
794794
__tgt_offload_entry &DeviceEntry);
795795

796796
/// Allocate and construct a kernel object.
797-
virtual Expected<GenericKernelTy *>
798-
constructKernelEntry(const __tgt_offload_entry &KernelEntry,
799-
DeviceImageTy &Image) = 0;
797+
virtual Expected<GenericKernelTy &>
798+
constructKernel(const __tgt_offload_entry &KernelEntry,
799+
OMPTgtExecModeFlags ExecMode) = 0;
800800

801801
/// Get and set the stack size and heap size for the device. If not used, the
802802
/// plugin can implement the setters as no-op and setting the output
@@ -837,8 +837,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
837837

838838
protected:
839839
/// Return the execution mode used for kernel \p Name.
840-
Expected<OMPTgtExecModeFlags> getExecutionModeForKernel(StringRef Name,
841-
DeviceImageTy &Image);
840+
virtual Expected<OMPTgtExecModeFlags>
841+
getExecutionModeForKernel(StringRef Name, DeviceImageTy &Image);
842842

843843
/// Environment variables defined by the LLVM OpenMP implementation
844844
/// regarding the initial number of streams and events.

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 68 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -37,31 +37,80 @@ struct CUDAKernelTy;
3737
struct CUDADeviceTy;
3838
struct CUDAPluginTy;
3939

40+
/// Class implementing the CUDA device images properties.
41+
struct CUDADeviceImageTy : public DeviceImageTy {
42+
/// Create the CUDA image with the id and the target image pointer.
43+
CUDADeviceImageTy(int32_t ImageId, const __tgt_device_image *TgtImage)
44+
: DeviceImageTy(ImageId, TgtImage), Module(nullptr) {}
45+
46+
/// Load the image as a CUDA module.
47+
Error loadModule() {
48+
assert(!Module && "Module already loaded");
49+
50+
CUresult Res = cuModuleLoadDataEx(&Module, getStart(), 0, nullptr, nullptr);
51+
if (auto Err = Plugin::check(Res, "Error in cuModuleLoadDataEx: %s"))
52+
return Err;
53+
54+
return Plugin::success();
55+
}
56+
57+
/// Unload the CUDA module corresponding to the image.
58+
Error unloadModule() {
59+
assert(Module && "Module not loaded");
60+
61+
CUresult Res = cuModuleUnload(Module);
62+
if (auto Err = Plugin::check(Res, "Error in cuModuleUnload: %s"))
63+
return Err;
64+
65+
Module = nullptr;
66+
67+
return Plugin::success();
68+
}
69+
70+
/// Getter of the CUDA module.
71+
CUmodule getModule() const { return Module; }
72+
73+
private:
74+
/// The CUDA module that loaded the image.
75+
CUmodule Module;
76+
};
77+
4078
/// Class implementing the CUDA kernel functionalities which derives from the
4179
/// generic kernel class.
4280
struct CUDAKernelTy : public GenericKernelTy {
43-
/// Create a CUDA kernel with a name, an execution mode, and the kernel
44-
/// function.
45-
CUDAKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode,
46-
CUfunction Func)
47-
: GenericKernelTy(Name, ExecutionMode), Func(Func) {}
81+
/// Create a CUDA kernel with a name and an execution mode.
82+
CUDAKernelTy(const char *Name, OMPTgtExecModeFlags ExecMode)
83+
: GenericKernelTy(Name, ExecMode), Func(nullptr) {}
4884

49-
/// Initialize the CUDA kernel
85+
/// Initialize the CUDA kernel.
5086
Error initImpl(GenericDeviceTy &GenericDevice,
5187
DeviceImageTy &Image) override {
88+
CUresult Res;
89+
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(Image);
90+
91+
// Retrieve the function pointer of the kernel.
92+
Res = cuModuleGetFunction(&Func, CUDAImage.getModule(), getName());
93+
if (auto Err = Plugin::check(Res, "Error in cuModuleGetFunction('%s'): %s",
94+
getName()))
95+
return Err;
96+
97+
// Check that the function pointer is valid.
98+
if (!Func)
99+
return Plugin::error("Invalid function for kernel %s", getName());
100+
52101
int MaxThreads;
53-
CUresult Res = cuFuncGetAttribute(
54-
&MaxThreads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Func);
102+
Res = cuFuncGetAttribute(&MaxThreads,
103+
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Func);
55104
if (auto Err = Plugin::check(Res, "Error in cuFuncGetAttribute: %s"))
56105
return Err;
57106

58-
/// Set the maximum number of threads for the CUDA kernel.
107+
// The maximum number of threads cannot exceed the maximum of the kernel.
59108
MaxNumThreads = std::min(MaxNumThreads, (uint32_t)MaxThreads);
60109

61110
return Plugin::success();
62111
}
63112

64-
/// Launch the CUDA kernel function
113+
/// Launch the CUDA kernel function.
65114
Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads,
66115
uint64_t NumBlocks, KernelArgsTy &KernelArgs, void *Args,
67116
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
@@ -165,44 +214,6 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
165214
HandleTy Event;
166215
};
167216

168-
/// Class implementing the CUDA device images properties.
169-
struct CUDADeviceImageTy : public DeviceImageTy {
170-
/// Create the CUDA image with the id and the target image pointer.
171-
CUDADeviceImageTy(int32_t ImageId, const __tgt_device_image *TgtImage)
172-
: DeviceImageTy(ImageId, TgtImage), Module(nullptr) {}
173-
174-
/// Load the image as a CUDA module.
175-
Error loadModule() {
176-
assert(!Module && "Module already loaded");
177-
178-
CUresult Res = cuModuleLoadDataEx(&Module, getStart(), 0, nullptr, nullptr);
179-
if (auto Err = Plugin::check(Res, "Error in cuModuleLoadDataEx: %s"))
180-
return Err;
181-
182-
return Plugin::success();
183-
}
184-
185-
/// Unload the CUDA module corresponding to the image.
186-
Error unloadModule() {
187-
assert(Module && "Module not loaded");
188-
189-
CUresult Res = cuModuleUnload(Module);
190-
if (auto Err = Plugin::check(Res, "Error in cuModuleUnload: %s"))
191-
return Err;
192-
193-
Module = nullptr;
194-
195-
return Plugin::success();
196-
}
197-
198-
/// Getter of the CUDA module.
199-
CUmodule getModule() const { return Module; }
200-
201-
private:
202-
/// The CUDA module that loaded the image.
203-
CUmodule Module;
204-
};
205-
206217
/// Class implementing the CUDA device functionalities which derives from the
207218
/// generic device class.
208219
struct CUDADeviceTy : public GenericDeviceTy {
@@ -330,32 +341,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
330341
}
331342

332343
/// Allocate and construct a CUDA kernel.
333-
Expected<GenericKernelTy *>
334-
constructKernelEntry(const __tgt_offload_entry &KernelEntry,
335-
DeviceImageTy &Image) override {
336-
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(Image);
337-
338-
// Retrieve the function pointer of the kernel.
339-
CUfunction Func;
340-
CUresult Res =
341-
cuModuleGetFunction(&Func, CUDAImage.getModule(), KernelEntry.name);
342-
if (auto Err = Plugin::check(Res, "Error in cuModuleGetFunction('%s'): %s",
343-
KernelEntry.name))
344-
return std::move(Err);
345-
346-
DP("Entry point " DPxMOD " maps to %s (" DPxMOD ")\n", DPxPTR(&KernelEntry),
347-
KernelEntry.name, DPxPTR(Func));
348-
349-
Expected<OMPTgtExecModeFlags> ExecModeOrErr =
350-
getExecutionModeForKernel(KernelEntry.name, Image);
351-
if (!ExecModeOrErr)
352-
return ExecModeOrErr.takeError();
353-
354-
// Allocate and initialize the CUDA kernel.
344+
Expected<GenericKernelTy &>
345+
constructKernel(const __tgt_offload_entry &KernelEntry,
346+
OMPTgtExecModeFlags ExecMode) override {
347+
// Allocate and construct the CUDA kernel.
355348
CUDAKernelTy *CUDAKernel = Plugin::get().allocate<CUDAKernelTy>();
356-
new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecModeOrErr.get(), Func);
349+
if (!CUDAKernel)
350+
return Plugin::error("Failed to allocate memory for CUDA kernel");
351+
352+
new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecMode);
357353

358-
return CUDAKernel;
354+
return *CUDAKernel;
359355
}
360356

361357
/// Set the current context to this device's context.

openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,27 @@ using llvm::sys::DynamicLibrary;
4949

5050
/// Class implementing kernel functionalities for GenELF64.
5151
struct GenELF64KernelTy : public GenericKernelTy {
52-
/// Construct the kernel with a name, execution mode and a function.
53-
GenELF64KernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode,
54-
void (*Func)(void))
55-
: GenericKernelTy(Name, ExecutionMode), Func(Func) {}
52+
/// Construct the kernel with a name and an execution mode.
53+
GenELF64KernelTy(const char *Name, OMPTgtExecModeFlags ExecMode)
54+
: GenericKernelTy(Name, ExecMode), Func(nullptr) {}
5655

5756
/// Initialize the kernel.
58-
Error initImpl(GenericDeviceTy &GenericDevice,
59-
DeviceImageTy &Image) override {
57+
Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override {
58+
// Functions have zero size.
59+
GlobalTy Global(getName(), 0);
60+
61+
// Get the metadata (address) of the kernel function.
62+
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
63+
if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global))
64+
return Err;
65+
66+
// Check that the function pointer is valid.
67+
if (!Global.getPtr())
68+
return Plugin::error("Invalid function for kernel %s", getName());
69+
70+
// Save the function pointer.
71+
Func = (void (*)())Global.getPtr();
72+
6073
// Set the maximum number of threads to a single.
6174
MaxNumThreads = 1;
6275
return Plugin::success();
@@ -119,23 +132,18 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
119132
Error deinitImpl() override { return Plugin::success(); }
120133

121134
/// Construct the kernel for a specific image on the device.
122-
Expected<GenericKernelTy *>
123-
constructKernelEntry(const __tgt_offload_entry &KernelEntry,
124-
DeviceImageTy &Image) override {
125-
GlobalTy Func(KernelEntry);
126-
127-
// Get the metadata (address) of the kernel function.
128-
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
129-
if (auto Err = GHandler.getGlobalMetadataFromDevice(*this, Image, Func))
130-
return std::move(Err);
131-
132-
// Allocate and create the kernel.
135+
Expected<GenericKernelTy &>
136+
constructKernel(const __tgt_offload_entry &KernelEntry,
137+
OMPTgtExecModeFlags ExecMode) override {
138+
// Allocate and construct the kernel.
133139
GenELF64KernelTy *GenELF64Kernel =
134140
Plugin::get().allocate<GenELF64KernelTy>();
135-
new (GenELF64Kernel) GenELF64KernelTy(
136-
KernelEntry.name, OMP_TGT_EXEC_MODE_GENERIC, (void (*)())Func.getPtr());
141+
if (!GenELF64Kernel)
142+
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
137143

138-
return GenELF64Kernel;
144+
new (GenELF64Kernel) GenELF64KernelTy(KernelEntry.name, ExecMode);
145+
146+
return *GenELF64Kernel;
139147
}
140148

141149
/// Set the current context to this device, which is a no-op.
@@ -312,6 +320,13 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
312320
}
313321
Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); }
314322

323+
protected:
324+
/// Retrieve the execution mode for kernels. All kernels use the generic mode.
325+
Expected<OMPTgtExecModeFlags>
326+
getExecutionModeForKernel(StringRef Name, DeviceImageTy &Image) override {
327+
return OMP_TGT_EXEC_MODE_GENERIC;
328+
}
329+
315330
private:
316331
/// Grid values for Generic ELF64 plugins.
317332
static constexpr GV GenELF64GridValues = {

0 commit comments

Comments
 (0)