python - tensorflow making an op for gpu usage -- demo app doesn't work -
i'm using tensorflow , after creating following files error below. suspect i'm supplying wrong kind of input, don't know how change proper representation.
dijkstra.py :
self.maze = tf.variable(tf.zeros([64], dtype=tf.int32), name="grid") print self.maze if true : self.grid_module = tf.load_op_library('d_grid_gpu.so') tf.session('') sess: sess.run(tf.initialize_all_variables()) self.output = self.grid_module.grid_gpu( self.maze ).eval()
d_grid_gpu.cc :
#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" using namespace tensorflow; register_op("gridgpu").input("grid: int32").output("prev: int32"); void run( int * in); class dgridgpuop : public opkernel { public: explicit dgridgpuop(opkernelconstruction* context) : opkernel(context) { } void compute(opkernelcontext* context) override { tensor* prev_tensor = null; tensor grid_tensor = context->input(0); auto grid = grid_tensor.flat<int32>(); op_requires_ok(context, context->allocate_output( 0, tensorshape({64}), &prev_tensor)); auto prev = prev_tensor->template flat<int32>(); run(grid.data());// } }; register_kernel_builder(name("gridgpu").device(device_gpu), dgridgpuop);
d_grid_gpu.cu.cc :
#if google_cuda #define eigen_use_gpu #include "third_party/eigen3/unsupported/eigen/cxx11/tensor" #include <stdio.h> #define size 10 __global__ void vectoradd( int *in, int n) { int = threadidx.x; if (i < n) in[i] = in[i] + i; } void run( int * in){ vectoradd<<< 1, size >>>( in, size); /* //these lines cause segfault //for (int = 0; < size; ++) { // printf("%i, " , in[i]); //} */ } #endif
build script :
tf_inc=$(python -c 'import tensorflow tf; print(tf.sysconfig.get_include())') nvcc -std=c++11 -c -o d_grid_gpu.cu.o d_grid_gpu.cu.cc \ -i $tf_inc -d google_cuda=1 -x cu -xcompiler -fpic --expt-relaxed-constexpr g++ -std=c++11 -shared -o d_grid_gpu.so d_grid_gpu.cc \ d_grid_gpu.cu.o -i $tf_inc -fpic -lcudart -d_glibcxx_use_cxx11_abi=0 -l /usr/lib/x86_64-linux-gnu/
edit : removed old output.
i tried 'add_one' op (from tf howto page) , think got work. leads me believe installation ok. example compiles. cannot registration right guess -- or something. welcome.
edit: reinstalled tensorflow , error little different
tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcublas.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcudnn.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcufft.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcuda.so.1 locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcurand.so locally simple dijkstra tensorflow <tensorflow.python.ops.variables.variable object @ 0x7fdec57c1b50> tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful numa node read sysfs had negative value (-1), there must @ least 1 numa node, returning numa node 0 tensorflow/core/common_runtime/gpu/gpu_device.cc:951] found device 0 properties: name: geforce gtx 850m major: 5 minor: 0 memoryclockrate (ghz) 0.9015 pcibusid 0000:0a:00.0 total memory: 3.95gib free memory: 3.64gib tensorflow/core/common_runtime/gpu/gpu_device.cc:972] dma: 0 tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: y tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] creating tensorflow device (/gpu:0) -> (device: 0, name: geforce gtx 850m, pci bus id: 0000:0a:00.0) traceback (most recent call last): file "test_op.py", line 45, in <module> d.eval() file "/home/dave/workspace/awesome-tf/test_gpu/dijkstra.py", line 57, in eval self.maze file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 559, in eval return _eval_using_default_session(self, feed_dict, self.graph, session) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3761, in _eval_using_default_session return session.run(tensors, feed_dict) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 717, in run run_metadata_ptr) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 915, in _run feed_dict_string, options, run_metadata) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 965, in _do_run target_list, options, run_metadata) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 985, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors.failedpreconditionerror: attempting use uninitialized value grid [[node: grid/read = identity[t=dt_int32, _class=["loc:@grid"], _device="/job:localhost/replica:0/task:0/cpu:0"](grid)]] caused op u'grid/read', defined at: file "test_op.py", line 45, in <module> d.eval() file "/home/dave/workspace/awesome-tf/test_gpu/dijkstra.py", line 50, in eval self.maze = tf.variable(tf.zeros([64], dtype=tf.int32), name="grid") file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 215, in __init__ dtype=dtype) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 327, in _init_from_args self._snapshot = array_ops.identity(self._variable, name="read") file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1128, in identity result = _op_def_lib.apply_op("identity", input=input, name=name) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op op_def=op_def) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2380, in create_op original_op=self._default_original_op, op_def=op_def) file "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1298, in __init__ self._traceback = _extract_stack() failedpreconditionerror (see above traceback): attempting use uninitialized value grid [[node: grid/read = identity[t=dt_int32, _class=["loc:@grid"], _device="/job:localhost/replica:0/task:0/cpu:0"](grid)]]
sometimes output:
tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcublas.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcudnn.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcufft.so locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcuda.so.1 locally tensorflow/stream_executor/dso_loader.cc:111] opened cuda library libcurand.so locally simple dijkstra tensorflow <tensorflow.python.ops.variables.variable object @ 0x7fba5d0dafd0> tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful numa node read sysfs had negative value (-1), there must @ least 1 numa node, returning numa node 0 tensorflow/core/common_runtime/gpu/gpu_device.cc:951] found device 0 properties: name: geforce gtx 850m major: 5 minor: 0 memoryclockrate (ghz) 0.9015 pcibusid 0000:0a:00.0 total memory: 3.95gib free memory: 3.67gib tensorflow/core/common_runtime/gpu/gpu_device.cc:972] dma: 0 tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: y tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] creating tensorflow device (/gpu:0) -> (device: 0, name: geforce gtx 850m, pci bus id: 0000:0a:00.0) segmentation fault (core dumped)
this case when use initialize_all_variables()
you want use tf.initialize_all_variables
initialize, instance: tf.session() sess: sess.run(tf.initialize_all_variables())
Comments
Post a Comment