python - TensorFlow CNN on multiple GPUs -
i trying parallelize code have tensorflow model run on multiple gpus. reason, code wrote parallelize training works standard deep neural net, throws errors when using convolutional neural net.
here code compute average gradients:
def average_gradients(tower_grads): average_grads = [] grad_and_vars in zip(*tower_grads): # note each grad_and_vars looks following: # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpun, var0_gpun)) grads = [] g, _ in grad_and_vars: # add 0 dimension gradients represent tower. expanded_g = tf.expand_dims(g, 0) # append on 'tower' dimension average on below. grads.append(expanded_g) # average on 'tower' dimension. grad = tf.concat(axis=0, values=grads) grad = tf.reduce_mean(grad, 0) # keep in mind variables redundant because shared # across towers. .. return first tower's pointer # variable. v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) return average_grads
this deep neural net architecture: (this works)
def neuralnet(data): hl_1 = {'weights':tf.get_variable('weights1',[tf_shape,n_nodes_hl1],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases1',[n_nodes_hl1],initializer=tf.random_normal_initializer())} hl_2 = {'weights':tf.get_variable('weights2',[n_nodes_hl1, n_nodes_hl2],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases2',[n_nodes_hl2],initializer=tf.random_normal_initializer())} hl_3 = {'weights':tf.get_variable('weights3',[n_nodes_hl2, n_nodes_hl3],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases3',[n_nodes_hl3],initializer=tf.random_normal_initializer())} hl_4 = {'weights':tf.get_variable('weights4',[n_nodes_hl3, n_nodes_hl4],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases4',[n_nodes_hl4],initializer=tf.random_normal_initializer())} hl_5 = {'weights':tf.get_variable('weights5',[n_nodes_hl4, n_nodes_hl5],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases5',[n_nodes_hl5],initializer=tf.random_normal_initializer())} output_layer = {'weights':tf.get_variable('weights-outputlayer',[n_nodes_hl5, n_classes],initializer=tf.random_normal_initializer()), 'biases':tf.get_variable('biases-outputlayer',[n_classes],initializer=tf.random_normal_initializer())} l1 = tf.add(tf.matmul(data, hl_1['weights']), hl_1['biases']) l1 = tf.nn.sigmoid(l1, name='op1') l2 = tf.add(tf.matmul(l1, hl_2['weights']), hl_2['biases']) l2 = tf.nn.sigmoid(l2, name='op2') l3 = tf.add(tf.matmul(l2, hl_3['weights']), hl_3['biases']) l3 = tf.nn.sigmoid(l3, name='op3') l4 = tf.add(tf.matmul(l3, hl_4['weights']), hl_4['biases']) l4 = tf.nn.sigmoid(l4, name='op4') l5 = tf.add(tf.matmul(l4, hl_5['weights']), hl_5['biases']) l5 = tf.nn.sigmoid(l5, name='op5') dropout = tf.nn.dropout(l5,keep_prob, name='op6') ol = tf.add(tf.matmul(dropout, output_layer['weights']), output_layer['biases'], name='op7') return ol
this convnet: (this not work)
def conv2d(x,w): return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='same') def maxpool2d(x): return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="same") def convneuralnet(x): weights = {'w_conv1':tf.get_variable('w_conv1',[7,7,1,2],initializer=tf.random_normal_initializer()), 'w_conv2':tf.get_variable('w_conv2',[7,7,2,4],initializer=tf.random_normal_initializer()), 'w_conv3':tf.get_variable('w_conv3',[7,7,4,8],initializer=tf.random_normal_initializer()), 'w_conv4':tf.get_variable('w_conv4',[7,7,8,16],initializer=tf.random_normal_initializer()), 'w_conv5':tf.get_variable('w_conv5',[7,7,16,32],initializer=tf.random_normal_initializer()), 'w_conv6':tf.get_variable('w_conv6',[7,7,32,64],initializer=tf.random_normal_initializer()), 'w_conv7':tf.get_variable('w_conv7',[7,7,64,128],initializer=tf.random_normal_initializer()), 'w_conv8':tf.get_variable('w_conv8',[7,7,128,256],initializer=tf.random_normal_initializer()), 'w_conv9':tf.get_variable('w_conv9',[7,7,256,512],initializer=tf.random_normal_initializer()), 'w_fc1':tf.get_variable('w_fc1',[512,1024],initializer=tf.random_normal_initializer()), 'w_fc2':tf.get_variable('w_fc2',[1024,2048],initializer=tf.random_normal_initializer()), 'w_fc3':tf.get_variable('w_fc3',[2048,4096],initializer=tf.random_normal_initializer()), 'out':tf.get_variable('w_out',[4096,n_classes],initializer=tf.random_normal_initializer())} biases = {'b_conv1':tf.get_variable('b_conv1',[2],initializer=tf.random_normal_initializer()), 'b_conv2':tf.get_variable('b_conv2',[4],initializer=tf.random_normal_initializer()), 'b_conv3':tf.get_variable('b_conv3',[8],initializer=tf.random_normal_initializer()), 'b_conv4':tf.get_variable('b_conv4',[16],initializer=tf.random_normal_initializer()), 'b_conv5':tf.get_variable('b_conv5',[32],initializer=tf.random_normal_initializer()), 'b_conv6':tf.get_variable('b_conv6',[64],initializer=tf.random_normal_initializer()), 'b_conv7':tf.get_variable('b_conv7',[128],initializer=tf.random_normal_initializer()), 'b_conv8':tf.get_variable('b_conv8',[256],initializer=tf.random_normal_initializer()), 'b_conv9':tf.get_variable('b_conv9',[512],initializer=tf.random_normal_initializer()), 'b_fc1':tf.get_variable('b_fc1',[1024],initializer=tf.random_normal_initializer()), 'b_fc2':tf.get_variable('b_fc2',[2048],initializer=tf.random_normal_initializer()), 'b_fc3':tf.get_variable('b_fc3',[4096],initializer=tf.random_normal_initializer()), 'out':tf.get_variable('b_out',[n_classes],initializer=tf.random_normal_initializer())} x = tf.reshape(x,shape=[-1,7,len_puzzle,1]) conv1 = conv2d(x, weights['w_conv1']) conv1 = maxpool2d(conv1) conv2 = conv2d(conv1, weights['w_conv2']) conv2 = maxpool2d(conv2) conv3 = conv2d(conv2, weights['w_conv3']) conv3 = maxpool2d(conv3) conv4 = conv2d(conv3, weights['w_conv4']) conv4 = maxpool2d(conv4) conv5 = conv2d(conv4, weights['w_conv5']) conv5 = maxpool2d(conv5) conv6 = conv2d(conv5, weights['w_conv6']) conv6 = maxpool2d(conv6) conv7 = conv2d(conv6, weights['w_conv7']) conv7 = maxpool2d(conv7) conv8 = conv2d(conv7, weights['w_conv8']) conv8 = maxpool2d(conv8) conv9 = conv2d(conv8, weights['w_conv9']) conv9 = maxpool2d(conv9) fc1 = tf.reshape(conv9, [-1,512]) fc1 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc1']),biases['b_fc1'])) fc2 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc2']),biases['b_fc2'])) fc3 = tf.nn.sigmoid(tf.add(tf.matmul(fc2,weights['w_fc3']),biases['b_fc3'])) last = tf.nn.dropout(fc3,keep_prob) output = tf.add(tf.matmul(last, weights['out']), biases['out'], name='op7') return output
this code runs session:
def train(x): tower_grads = [] opt = tf.train.adamoptimizer(learning_rate) in xrange(2): tf.device('/gpu:%d' % i): tf.variable_scope('nn',reuse=i>0): prediction = convneuralnet(x) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y)) tf.summary.scalar('cross_entropy',cost) grads = opt.compute_gradients(cost) tower_grads.append(grads) print grads print len(grads) #scope.reuse_variables() grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads) train_op = tf.group(apply_gradient_op) correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct,'float')) tf.summary.scalar('accuracy',accuracy) num_epochs = ne tf.session(config=tf.configproto(allow_soft_placement=true, log_device_placement=true)) sess: saver = tf.train.saver() # uncomment when restarting checkpoint #saver.restore(sess, tf.train.latest_checkpoint(os.getcwd()+'/models/base/.')) sess.run(tf.global_variables_initializer()) merged_summary = tf.summary.merge_all() epoch in range(num_epochs): epoch_loss = 0 in range(int(real_x_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # x.shape[0] randidx = np.random.choice(real_x_9.shape[0], batch_size, replace=false) epoch_x,epoch_y = real_x_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # x,y j,c = sess.run([train_op,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:train_keep_prob}) if == 0: [ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:train_keep_prob}) print 'train accuracy', ta epoch_loss += c print '\n','epoch', epoch + 1, 'completed out of', num_epochs, '\nloss:',epoch_loss #saver.save(sess, os.getcwd()+'/models/base/basednn7') #saver.export_meta_graph(os.getcwd()+'/models/base/basednn7.meta') print '\n','train accuracy', accuracy.eval(feed_dict={x:real_x_9, y:real_y_9, keep_prob:train_keep_prob}) print '\n','test accuracy', accuracy.eval(feed_dict={x:test_real_x, y:test_real_y, keep_prob:1.0}) #x, y #mnist.test.images, mnist.test.labels train(x)
this error:
traceback (most recent call last): file "cnn_gpu.py", line 393, in <module> train(x) file "cnn_gpu.py", line 311, in train grads = average_gradients(tower_grads) expanded_g = tf.expand_dims(g, 0) file "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 170, in expand_dims return gen_array_ops._expand_dims(input, axis, name) file "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 900, in _expand_dims result = _op_def_lib.apply_op("expanddims", input=input, dim=dim, name=name) file "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 509, in apply_op (input_name, err)) valueerror: tried convert 'input' tensor , failed. error: none values not supported.
i'm confused. parallelization across multiple gpus should work regardless of type of neural net being used.
any here appreciated.
Comments
Post a Comment