WIP

2024-08-01 11:50:40 +02:00
parent f8a6574698
commit 609531677b
2 changed files with 130 additions and 0 deletions
--- a/dev.py
+++ b/dev.py
@@ -0,0 +1,46 @@
+import os
+import time
+
+import numpy as np
+import torch
+
+# Parameters
+filename = "benchmark_data.dat"
+shape = (10000, 10000)  # Large array
+dtype = np.float32
+torch_dtype = torch.float32
+
+# Calculate file size
+element_size = np.dtype(dtype).itemsize
+file_size = shape[0] * shape[1] * element_size
+
+# Create a large file and write random data to it
+if not os.path.exists(filename) or os.path.getsize(filename) != file_size:
+    data = np.random.rand(*shape).astype(dtype)
+    with open(filename, "wb") as f:
+        f.write(data.tobytes())
+
+# Benchmark numpy.memmap
+start_time = time.time()
+data_np = np.memmap(filename, dtype=dtype, mode="r", shape=shape)
+tensor_np = torch.from_numpy(data_np)
+np_load_time = time.time() - start_time
+print(f"np.memmap load time: {np_load_time:.4f} seconds")
+
+# Benchmark torch.UntypedStorage
+start_time = time.time()
+storage = torch.UntypedStorage.from_file(filename, shared=True, nbytes=file_size)
+tensor = torch.FloatTensor(storage).reshape(shape)
+torch_load_time = time.time() - start_time
+print(f"torch.UntypedStorage load time: {torch_load_time:.4f} seconds")
+
+# Set NumPy print precision
+# np.set_printoptions(precision=4)
+
+# Print part of the arrays to compare precision
+print("NumPy memmap array sample:\n", data_np[:5, :5])
+print("PyTorch tensor sample:\n", tensor[:5, :5].numpy())
+
+# Output the results
+print(f"Numpy memmap load time: {np_load_time:.4f} seconds")
+print(f"Torch UntypedStorage load time: {torch_load_time:.4f} seconds")