Skip to content

Commit b751532

Browse files
committed
memory: Drop support for managed memory
1 parent 9005240 commit b751532

File tree

12 files changed

+13
-583
lines changed

12 files changed

+13
-583
lines changed

benchmarks/stdgpu/main.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,6 @@ main(int argc, char* argv[])
6565
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
6666
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
6767
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
68-
printf("| Managed %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
69-
") |\n",
70-
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
71-
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
72-
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
73-
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
74-
printf("+---------------------------------------------------------+\n");
7568

7669
return EXIT_SUCCESS;
7770
}

src/stdgpu/cuda/impl/memory.cpp

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
3939
}
4040
break;
4141

42-
case dynamic_memory_type::managed:
43-
{
44-
STDGPU_CUDA_SAFE_CALL(cudaMallocManaged(array, static_cast<std::size_t>(bytes)));
45-
}
46-
break;
47-
4842
case dynamic_memory_type::invalid:
4943
default:
5044
{
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
7165
}
7266
break;
7367

74-
case dynamic_memory_type::managed:
75-
{
76-
STDGPU_CUDA_SAFE_CALL(cudaFree(array));
77-
}
78-
break;
79-
8068
case dynamic_memory_type::invalid:
8169
default:
8270
{
@@ -95,18 +83,15 @@ memcpy(void* destination,
9583
{
9684
cudaMemcpyKind kind;
9785

98-
if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
99-
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
86+
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
10087
{
10188
kind = cudaMemcpyDeviceToDevice;
10289
}
103-
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
104-
source_type == dynamic_memory_type::host)
90+
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
10591
{
10692
kind = cudaMemcpyHostToDevice;
10793
}
108-
else if (destination_type == dynamic_memory_type::host &&
109-
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
94+
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
11095
{
11196
kind = cudaMemcpyDeviceToHost;
11297
}
@@ -123,22 +108,4 @@ memcpy(void* destination,
123108
STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
124109
}
125110

126-
void
127-
workaround_synchronize_managed_memory()
128-
{
129-
// We need to synchronize the whole device before accessing managed memory on pre-Pascal GPUs
130-
int current_device;
131-
int hash_concurrent_managed_access;
132-
STDGPU_CUDA_SAFE_CALL(cudaGetDevice(&current_device));
133-
STDGPU_CUDA_SAFE_CALL(cudaDeviceGetAttribute(&hash_concurrent_managed_access,
134-
cudaDevAttrConcurrentManagedAccess,
135-
current_device));
136-
if (hash_concurrent_managed_access == 0)
137-
{
138-
printf("stdgpu::cuda::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
139-
"the data on the host ...\n");
140-
STDGPU_CUDA_SAFE_CALL(cudaDeviceSynchronize());
141-
}
142-
}
143-
144111
} // namespace stdgpu::cuda

src/stdgpu/cuda/memory.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,6 @@ memcpy(void* destination,
5454
dynamic_memory_type destination_type,
5555
dynamic_memory_type source_type);
5656

57-
/**
58-
* \brief Workarounds a synchronization issue with older GPUs
59-
*/
60-
void
61-
workaround_synchronize_managed_memory();
62-
6357
} // namespace stdgpu::cuda
6458

6559
#endif // STDGPU_CUDA_MEMORY_H

src/stdgpu/hip/impl/memory.cpp

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
3939
}
4040
break;
4141

42-
case dynamic_memory_type::managed:
43-
{
44-
STDGPU_HIP_SAFE_CALL(hipMallocManaged(array, static_cast<std::size_t>(bytes)));
45-
}
46-
break;
47-
4842
case dynamic_memory_type::invalid:
4943
default:
5044
{
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
7165
}
7266
break;
7367

74-
case dynamic_memory_type::managed:
75-
{
76-
STDGPU_HIP_SAFE_CALL(hipFree(array));
77-
}
78-
break;
79-
8068
case dynamic_memory_type::invalid:
8169
default:
8270
{
@@ -95,18 +83,15 @@ memcpy(void* destination,
9583
{
9684
hipMemcpyKind kind;
9785

98-
if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
99-
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
86+
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
10087
{
10188
kind = hipMemcpyDeviceToDevice;
10289
}
103-
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
104-
source_type == dynamic_memory_type::host)
90+
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
10591
{
10692
kind = hipMemcpyHostToDevice;
10793
}
108-
else if (destination_type == dynamic_memory_type::host &&
109-
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
94+
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
11095
{
11196
kind = hipMemcpyDeviceToHost;
11297
}
@@ -123,22 +108,4 @@ memcpy(void* destination,
123108
STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
124109
}
125110

126-
void
127-
workaround_synchronize_managed_memory()
128-
{
129-
// We need to synchronize the whole device before accessing managed memory on old GPUs
130-
int current_device;
131-
int has_concurrent_managed_access;
132-
STDGPU_HIP_SAFE_CALL(hipGetDevice(&current_device));
133-
STDGPU_HIP_SAFE_CALL(hipDeviceGetAttribute(&has_concurrent_managed_access,
134-
hipDeviceAttributeConcurrentManagedAccess,
135-
current_device));
136-
if (has_concurrent_managed_access == 0)
137-
{
138-
printf("stdgpu::hip::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
139-
"the data on the host ...\n");
140-
STDGPU_HIP_SAFE_CALL(hipDeviceSynchronize());
141-
}
142-
}
143-
144111
} // namespace stdgpu::hip

src/stdgpu/hip/memory.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,6 @@ memcpy(void* destination,
5454
dynamic_memory_type destination_type,
5555
dynamic_memory_type source_type);
5656

57-
/**
58-
* \brief Workarounds a synchronization issue with older GPUs
59-
*/
60-
void
61-
workaround_synchronize_managed_memory();
62-
6357
} // namespace stdgpu::hip
6458

6559
#endif // STDGPU_HIP_MEMORY_H

src/stdgpu/impl/memory.cpp

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -239,12 +239,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
239239
return manager_host;
240240
}
241241

242-
case dynamic_memory_type::managed:
243-
{
244-
static memory_manager manager_managed;
245-
return manager_managed;
246-
}
247-
248242
case dynamic_memory_type::invalid:
249243
default:
250244
{
@@ -255,12 +249,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
255249
}
256250
}
257251

258-
void
259-
workaround_synchronize_managed_memory()
260-
{
261-
stdgpu::STDGPU_BACKEND_NAMESPACE::workaround_synchronize_managed_memory();
262-
}
263-
264252
[[nodiscard]] void*
265253
allocate(index64_t bytes, dynamic_memory_type type)
266254
{
@@ -310,15 +298,12 @@ memcpy(void* destination,
310298
{
311299
if (!external_memory)
312300
{
313-
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes) &&
314-
!dispatch_allocation_manager(dynamic_memory_type::managed).contains_submemory(destination, bytes))
301+
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes))
315302
{
316303
printf("stdgpu::detail::memcpy : Copying to unknown destination pointer not possible\n");
317304
return;
318305
}
319-
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes) &&
320-
!dispatch_allocation_manager(dynamic_memory_type::managed)
321-
.contains_submemory(const_cast<void*>(source), bytes))
306+
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes))
322307
{
323308
printf("stdgpu::detail::memcpy : Copying from unknown source pointer not possible\n");
324309
return;
@@ -345,12 +330,6 @@ dispatch_size_manager(const dynamic_memory_type type)
345330
return manager_host;
346331
}
347332

348-
case dynamic_memory_type::managed:
349-
{
350-
static memory_manager manager_managed;
351-
return manager_managed;
352-
}
353-
354333
case dynamic_memory_type::invalid:
355334
default:
356335
{
@@ -375,10 +354,6 @@ get_dynamic_memory_type(void* array)
375354
{
376355
return dynamic_memory_type::host;
377356
}
378-
if (detail::dispatch_size_manager(dynamic_memory_type::managed).contains_memory(array))
379-
{
380-
return dynamic_memory_type::managed;
381-
}
382357

383358
return dynamic_memory_type::invalid;
384359
}

src/stdgpu/impl/memory_detail.h

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,6 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)
124124
destroy_functor<Iterator>(first));
125125
}
126126

127-
void
128-
workaround_synchronize_managed_memory();
129-
130127
} // namespace stdgpu::detail
131128

132129
template <typename T>
@@ -179,61 +176,6 @@ createHostArray(const stdgpu::index64_t count, const T default_value)
179176
return host_array;
180177
}
181178

182-
template <typename T>
183-
T*
184-
createManagedArray(const stdgpu::index64_t count, const T default_value, const Initialization initialize_on)
185-
{
186-
using Allocator = stdgpu::safe_managed_allocator<T>;
187-
Allocator managed_allocator;
188-
189-
T* managed_array = stdgpu::allocator_traits<Allocator>::allocate(managed_allocator, count);
190-
191-
if (managed_array == nullptr)
192-
{
193-
printf("createManagedArray : Failed to allocate array. Aborting ...\n");
194-
return nullptr;
195-
}
196-
197-
switch (initialize_on)
198-
{
199-
#if STDGPU_DETAIL_IS_DEVICE_COMPILED
200-
case Initialization::DEVICE:
201-
{
202-
stdgpu::uninitialized_fill(stdgpu::execution::device,
203-
stdgpu::device_begin(managed_array),
204-
stdgpu::device_end(managed_array),
205-
default_value);
206-
}
207-
break;
208-
#else
209-
case Initialization::DEVICE:
210-
{
211-
// Same as host path
212-
}
213-
[[fallthrough]];
214-
#endif
215-
216-
case Initialization::HOST:
217-
{
218-
stdgpu::detail::workaround_synchronize_managed_memory();
219-
220-
stdgpu::uninitialized_fill(stdgpu::execution::host,
221-
stdgpu::host_begin(managed_array),
222-
stdgpu::host_end(managed_array),
223-
default_value);
224-
}
225-
break;
226-
227-
default:
228-
{
229-
printf("createManagedArray : Invalid initialization device. Returning created but uninitialized array "
230-
"...\n");
231-
}
232-
}
233-
234-
return managed_array;
235-
}
236-
237179
template <typename T>
238180
void
239181
destroyDeviceArray(T*& device_array)
@@ -275,21 +217,6 @@ destroyHostArray(T*& host_array)
275217
host_array = nullptr;
276218
}
277219

278-
template <typename T>
279-
void
280-
destroyManagedArray(T*& managed_array)
281-
{
282-
using Allocator = stdgpu::safe_managed_allocator<T>;
283-
Allocator managed_allocator;
284-
285-
// Call on host since the initialization place is not known
286-
stdgpu::allocator_traits<Allocator>::deallocate_filled(stdgpu::execution::host,
287-
managed_allocator,
288-
managed_array,
289-
stdgpu::size(managed_array));
290-
managed_array = nullptr;
291-
}
292-
293220
template <typename T>
294221
T*
295222
copyCreateDevice2HostArray(const T* device_array, const stdgpu::index64_t count, const MemoryCopy check_safety)
@@ -565,33 +492,6 @@ safe_host_allocator<T>::deallocate(T* p, index64_t n)
565492
memory_type);
566493
}
567494

568-
template <typename T>
569-
template <typename U>
570-
safe_managed_allocator<T>::safe_managed_allocator([[maybe_unused]] const safe_managed_allocator<U>& other) noexcept
571-
{
572-
}
573-
574-
template <typename T>
575-
[[nodiscard]] T*
576-
safe_managed_allocator<T>::allocate(index64_t n)
577-
{
578-
T* p = static_cast<T*>(
579-
detail::allocate(n * static_cast<index64_t>(sizeof(T)), memory_type)); // NOLINT(bugprone-sizeof-expression)
580-
register_memory(p, n, memory_type);
581-
return p;
582-
}
583-
584-
template <typename T>
585-
void
586-
safe_managed_allocator<T>::deallocate(T* p, index64_t n)
587-
{
588-
deregister_memory(p, n, memory_type);
589-
// NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
590-
detail::deallocate(static_cast<void*>(const_cast<std::remove_cv_t<T>*>(p)),
591-
n * static_cast<index64_t>(sizeof(T)), // NOLINT(bugprone-sizeof-expression)
592-
memory_type);
593-
}
594-
595495
template <typename Allocator>
596496
typename allocator_traits<Allocator>::pointer
597497
allocator_traits<Allocator>::allocate(Allocator& a, typename allocator_traits<Allocator>::index_type n)

0 commit comments

Comments
 (0)