import os
import time
import sys
import logging
try:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
except ImportError:
import tensorflow as tf
import numpy as np
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.training import moving_averages
from rid.constants import kbT, beta, N_grid, inverse_f_cvt
tf_precision = tf.float32
logging.basicConfig(
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO").upper(),
stream=sys.stdout,
)
logger = logging.getLogger(__name__)
[docs]class Reader(object):
def __init__(self, config):
# copy from config
self.data_path = config.data_path
self.num_epoch = config.num_epoch
self.use_mix = config.use_mix
self.old_ratio = config.old_ratio
assert self.old_ratio >= 0, "the old data ration sould be >= 0"
self.batch_size = config.batch_size
self.batch_size_old = int(
self.batch_size * self.old_ratio / (1. + self.old_ratio))
self.batch_size_new = self.batch_size - self.batch_size_old
self.cv_dim = config.cv_dim
self.drop_out_rate = config.drop_out_rate
self.angular_mask = config.angular_mask
[docs] def prepare(self):
self.n_input = self.cv_dim
self.index_count_all = 0
self.index_count_new = 0
self.index_count_old = 0
if self.use_mix:
tr_data_new = np.load(self.data_path)
tr_data_new = np.reshape(tr_data_new, [-1, self.cv_dim * 2])
tr_data_new[:, self.cv_dim:] *= inverse_f_cvt
self.inputs_train_new = tr_data_new[:, :]
tr_data_old = np.load(self.data_path)
tr_data_old = np.reshape(tr_data_old, [-1, self.cv_dim * 2])
tr_data_old[:, self.cv_dim:] *= inverse_f_cvt
self.inputs_train_old = tr_data_old[:, :]
self.train_size_new = self.inputs_train_new.shape[0]
self.train_size_old = self.inputs_train_old.shape[0]
if self.batch_size_new > self.train_size_new:
self.batch_size_new = self.train_size_new
if self.batch_size_old > self.train_size_old:
self.batch_size_old = self.train_size_old
self.batch_size = self.batch_size_old + self.batch_size_new
print("# batch_size %d mixed by old %d new %d" %
(self.batch_size, self.batch_size_old, self.batch_size_new))
else:
tr_data_all = np.load(self.data_path)
tr_data_all[:, self.cv_dim:] *= inverse_f_cvt
self.inputs_train_all = tr_data_all[:, :]
self.train_size_all = self.inputs_train_all.shape[0]
# print(np.shape(self.inputs_train))
def _sample_train_all(self):
self.index_count_all += self.batch_size
if self.index_count_all > self.train_size_all:
# shuffle the data
self.index_count_all = self.batch_size
ind = np.random.choice(self.train_size_all,
self.train_size_all, replace=False)
self.inputs_train_all = self.inputs_train_all[ind, :]
ind = np.arange(self.index_count_all -
self.batch_size, self.index_count_all)
return self.inputs_train_all[ind, :]
def _sample_train_mix(self, cat=True):
self.index_count_new += self.batch_size_new
if self.index_count_new > self.train_size_new:
# shuffle the data
self.index_count_new = self.batch_size_new
ind = np.random.choice(self.train_size_new,
self.train_size_new, replace=False)
self.inputs_train_new = self.inputs_train_new[ind, :]
self.index_count_old += self.batch_size_old
if self.index_count_old > self.train_size_old:
# shuffle the data
self.index_count_old = self.batch_size_old
ind = np.random.choice(self.train_size_old,
self.train_size_old, replace=False)
self.inputs_train_old = self.inputs_train_old[ind, :]
ind_new = np.arange(self.index_count_new -
self.batch_size_new, self.index_count_new)
ind_old = np.arange(self.index_count_old -
self.batch_size_old, self.index_count_old)
if cat:
return np.concatenate([self.inputs_train_new[ind_new, :], self.inputs_train_old[ind_old, :]], axis=0)
else:
return self.inputs_train_new[ind_new, :], self.inputs_train_old[ind_old, :]
[docs] def sample_train(self, cat=True):
if self.use_mix:
return self._sample_train_mix(cat)
else:
return self._sample_train_all()
[docs] def get_train_size(self):
if self.use_mix:
return int(self.train_size_new * (1. + self.old_ratio))
else:
return self.train_size_all
[docs] def get_batch_size(self):
return self.batch_size
[docs] def get_data(self):
if self.use_mix:
return self.inputs_train_new, self.inputs_train_old
else:
return self.inputs_train_all
[docs]class Model(object):
def __init__(self, config, sess):
self.sess = sess
# copy from config
self.data_path = config.data_path
self.n_neuron = config.n_neuron
self.useBN = config.useBN
self.n_displayepoch = config.n_displayepoch
self.starter_learning_rate = config.starter_learning_rate
self.decay_steps = config.decay_steps
self.decay_steps_inner = config.decay_steps_inner
self.decay_rate = config.decay_rate
self.display_in_training = config.display_in_training
self.restart = config.restart
self.resnet = config.resnet
self.graph_file = config.graph_file
self.cv_dim = int(config.cv_dim)
self.angular_mask = np.array(config.angular_mask)
self.angular_dim = np.sum(self.angular_mask, dtype=int)
self.non_angular_dim = self.cv_dim - self.angular_dim
self.angular_mask_boolean = (self.angular_mask == 1)
self.non_angular_mask_boolean = (self.angular_mask == 0)
if self.graph_file is not None:
self.graph = self.load_graph(self.graph_file, 'load')
else:
self.graph = None
[docs] def test_error(self, inputs_train):
ret = self.sess.run([self.l2_loss, self.rel_error_k],
feed_dict={self.inputs_train: inputs_train,
self.is_training: False,
self.drop_out_rate: 0.0})
error = np.sqrt(ret[0])
error2 = np.mean(np.sqrt(ret[1]))
return error, error2
[docs] def test_error_mix(self, reader):
data_new, data_old = reader.get_data()
ret = self.sess.run([self.l2_loss, self.rel_error_k],
feed_dict={self.inputs_train: data_new,
self.is_training: False,
self.drop_out_rate: 0.0})
error_new = np.sqrt(ret[0])
error_new2 = np.mean(np.sqrt(ret[1]))
ret = self.sess.run([self.l2_loss, self.rel_error_k],
feed_dict={self.inputs_train: data_old,
self.is_training: False,
self.drop_out_rate: 0.0})
error_old = np.sqrt(ret[0])
error_old2 = np.mean(np.sqrt(ret[1]))
return error_new, error_new2, error_old, error_old2
[docs] def train(self, reader):
reader.prepare()
self.n_input = reader.n_input
self.inputs_train = tf.placeholder(
tf_precision, [None, self.n_input + self.cv_dim], name='inputs')
self.is_training = tf.placeholder(tf.bool)
self.drop_out_rate = tf.placeholder(tf.float32, name='drop_out_rate') ### drop out ###
self._extra_train_ops = []
self.global_step = tf.get_variable('global_step', [],
initializer=tf.constant_initializer(
1),
trainable=False, dtype=tf.int32)
self.global_epoch = self.global_step * \
reader.get_batch_size() // reader.get_train_size()
if self.decay_steps_inner == 0:
self.learning_rate = tf.train.exponential_decay(self.starter_learning_rate,
self.global_epoch,
self.decay_steps,
self.decay_rate,
staircase=True)
else:
self.global_epoch_inner = self.global_epoch % self.decay_steps
self.lr_pref = tf.train.exponential_decay(1.0,
self.global_epoch,
self.decay_steps,
self.decay_rate,
staircase=False)
self.learning_rate = tf.train.exponential_decay(self.starter_learning_rate,
self.global_epoch_inner,
self.decay_steps_inner,
self.decay_rate,
staircase=True)
self.learning_rate *= self.lr_pref
self.mv_decay = 1.0 - self.learning_rate/self.starter_learning_rate
avg_input, scl_input = self.compute_statistic(reader)
self.energy, self.l2_loss, self.rel_error_k\
= self.build_force(self.inputs_train, suffix="test", reuse=False, shift=avg_input, scale=scl_input, graph=self.graph)
# train operations
trainable_variables = tf.trainable_variables()
grads = tf.gradients(self.l2_loss, trainable_variables)
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
apply_op = optimizer.apply_gradients(zip(grads, trainable_variables),
global_step=self.global_step, name='train_step')
train_ops = [apply_op] + self._extra_train_ops
self.train_op = tf.group(*train_ops)
saver = tf.train.Saver()
# initialization
if (self.restart == False):
sample_used = 0
epoch_used = 0
self.sess.run(tf.global_variables_initializer())
print('# start training from scratch')
elif self.graph is None:
self.sess.run(tf.global_variables_initializer())
saver.restore(self.sess, "old_model/model.ckpt")
print("Model restored.")
self.global_step = tf.assign(
self.global_step, 0, name='global_step')
cur_step = self.sess.run(self.global_step)
sample_used = cur_step * reader.get_batch_size()
epoch_used = sample_used // reader.get_train_size()
start_time = time.time()
inputs_train = reader.sample_train()
if reader.use_mix:
error, error2, error_old, error_old2 = self.test_error_mix(reader)
else:
error, error2 = self.test_error(inputs_train)
current_lr = self.sess.run(tf.to_double(self.learning_rate))
if self.display_in_training:
if reader.use_mix:
logger.info("epoch: %3u, ab_err_n: %.4e, rel_err_n: %.4e, ab_err_o: %.4e, rel_err_o: %.4e, lr: %.4e"
% (epoch_used, error, error2, error_old, error_old2, current_lr))
else:
logger.info("epoch: %3u, ab_err: %.4e, rel_err: %.4e, lr: %.4e" %
(epoch_used, error, error2, current_lr))
while epoch_used < reader.num_epoch:
# print('# doing training')
inputs_train = reader.sample_train()
self.sess.run([self.train_op],
feed_dict={self.inputs_train: inputs_train,
self.is_training: True,
self.drop_out_rate: reader.drop_out_rate})
sample_used += reader.get_batch_size()
# print(sample_used)
if (sample_used // reader.get_train_size()) > epoch_used:
epoch_used = sample_used // reader.get_train_size()
if epoch_used % self.n_displayepoch == 0:
save_path = saver.save(
self.sess, os.getcwd() + "/" + "model.ckpt")
if reader.use_mix:
error, error2, error_old, error_old2 = self.test_error_mix(
reader)
else:
error, error2 = self.test_error(inputs_train)
current_lr = self.sess.run(
tf.to_double(self.learning_rate))
if self.display_in_training:
if reader.use_mix:
logger.info("epoch: %3u, ab_err_n: %.4e, rel_err_n: %.4e, ab_err_o: %.4e, rel_err_o: %.4e, lr: %.4e"
% (epoch_used, error, error2, error_old, error_old2, current_lr))
else:
logger.info("epoch: %3u, ab_err: %.4e, rel_err: %.4e, lr: %.4e" % (
epoch_used, error, error2, current_lr))
sys.stdout.flush()
end_time = time.time()
logger.info("running time: %.3f s" % (end_time-start_time))
[docs] def compute_statistic(self,
reader):
max_scale = 3.
if reader.use_mix:
dnew, dold = reader.get_data()
else:
dold = reader.get_data()
da = np.average(dold[:, 0:self.cv_dim], axis=0)
ds = np.std(dold[:, 0:self.cv_dim], axis=0)
# da[:self.cv_dih_dim] = 0.0
da[self.angular_mask_boolean] = 0.0
if all(ds != 0):
ds = 1./(ds)
# ds[:self.cv_dih_dim] = 1.0
ds[self.angular_mask_boolean] = 1.0
non_angular_cv = ds[self.non_angular_mask_boolean]
non_angular_cv[non_angular_cv>max_scale] = max_scale
ds[self.non_angular_mask_boolean] = non_angular_cv
# for ii in range(self.cv_dih_dim, self.cv_dim):
# if ds[ii] > max_scale:
# ds[ii] = max_scale
return da, ds
[docs] def build_force(self,
inputs,
suffix,
shift=None,
scale=None,
reuse=None,
graph=None):
cvs = tf.slice(inputs, [0, 0], [-1, self.cv_dim], name='cvs')
if shift is not None:
assert(scale is not None)
t_shift = tf.get_variable('input_shift',
[self.cv_dim],
dtype=tf_precision,
trainable=False,
initializer=tf.constant_initializer(shift))
t_scale = tf.get_variable('input_scale',
[self.cv_dim],
dtype=tf_precision,
trainable=False,
initializer=tf.constant_initializer(scale))
# t_shift = tf.constant(shift, name='input_shift')
# t_scale = tf.constant(scale, name='input_scale')
cvs = (cvs - t_shift) * t_scale
# angles = tf.slice(cvs, [0, 0], [-1, self.cv_dih_dim], name='angles')
angles = tf.boolean_mask(
cvs, self.angular_mask_boolean, axis=1, name='angles'
)
angles = tf.reshape(angles, [-1, self.angular_dim])
dists = tf.boolean_mask(
cvs, self.non_angular_mask_boolean, axis=1, name='dists'
)
dists = tf.reshape(dists, [-1, self.non_angular_dim])
# dists = tf.slice(cvs, [0, self.cv_dih_dim],
# [-1, self.cv_dist_dim], name='dists')
forces_hat = tf.slice(
inputs, [0, self.cv_dim], [-1, self.cv_dim], name='forces')
inputs = tf.concat([tf.cos(angles), tf.sin(angles), dists], 1)
if graph is not None:
init_t = [graph.get_tensor_by_name('load/layer_0/matrix:0'),
graph.get_tensor_by_name('load/layer_0/bias:0')
]
with tf.Session(graph=self.graph) as g_sess:
init = g_sess.run(init_t)
else:
init = None
layer = self._one_layer(
inputs, self.n_neuron[0], drop_out_rate=self.drop_out_rate, name='layer_0', reuse=reuse, init=init)
for ii in range(1, len(self.n_neuron)):
if graph is not None:
init_t = [graph.get_tensor_by_name('load/layer_%s/matrix:0' % str(ii)),
graph.get_tensor_by_name(
'load/layer_%s/bias:0' % str(ii))
]
if self.resnet and self.n_neuron[ii] == self.n_neuron[ii-1]:
init_t += [graph.get_tensor_by_name(
'load/layer_%s/timestep:0' % str(ii))]
with tf.Session(graph=self.graph) as g_sess:
init = g_sess.run(init_t)
else:
init = None
if self.resnet and self.n_neuron[ii] == self.n_neuron[ii-1]:
layer += self._one_layer(layer, self.n_neuron[ii], drop_out_rate=self.drop_out_rate, name='layer_'+str(
ii), reuse=reuse, with_timestep=True, init=init)
else:
layer = self._one_layer(
layer, self.n_neuron[ii], drop_out_rate=self.drop_out_rate, name='layer_'+str(ii), reuse=reuse, with_timestep=False, init=init)
if graph is not None:
init_t = graph.get_tensor_by_name('load/energy/matrix:0')
with tf.Session(graph=self.graph) as g_sess:
init = g_sess.run(init_t)
else:
init = None
energy_ = self._final_layer(
layer, 1, activation_fn=None, name='energy', reuse=reuse, init=init)
energy = tf.identity(energy_, name='o_energy')
forces = - tf.reshape(tf.stack(tf.gradients(energy, cvs)),
[-1, self.cv_dim], name='o_forces')
force_dif = forces_hat - forces
forces_norm = tf.reshape(tf.reduce_sum(
forces * forces, axis=1), [-1, 1])
forces_dif_norm = tf.reshape(tf.reduce_sum(
force_dif * force_dif, axis=1), [-1, 1])
l2_loss = tf.reduce_mean(forces_dif_norm, name='l2_loss')
rel_error_k = forces_dif_norm / (1E-8 + forces_norm)
return energy, l2_loss, rel_error_k
[docs] def load_graph(self,
frozen_graph_filename,
prefix='load'):
# We load the protobuf file from the disk and parse it to retrieve the
# unserialized graph_def
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Then, we can use again a convenient built-in function to import a graph_def into the
# current default Graph
with tf.Graph().as_default() as graph:
tf.import_graph_def(
graph_def,
input_map=None,
return_elements=None,
name=prefix,
producer_op_list=None
)
# for op in graph.get_operations():
# print(op.name)
return graph
def _one_layer(self,
inputs,
outputs_size,
drop_out_rate,
activation_fn=tf.nn.tanh,
stddev=1.0,
bavg=0.0,
init=None,
name='linear',
reuse=None,
seed=None,
with_timestep=False):
with tf.variable_scope(name, reuse=reuse):
shape = inputs.get_shape().as_list()
if init is not None:
a_i_w = init[0]
a_i_b = init[1]
i_w_s = a_i_w.shape
i_b_s = a_i_b.shape
a_e_w = np.random.normal(
scale=stddev/np.sqrt(shape[1]+outputs_size), size=[shape[1], outputs_size])
a_e_b = np.random.normal(
scale=stddev, loc=bavg, size=[outputs_size])
a_e_w[:i_w_s[0], :i_w_s[1]] = a_i_w
a_e_b[:i_b_s[0]] = a_i_b
initer_w = tf.constant_initializer(a_e_w)
initer_b = tf.constant_initializer(a_e_b)
else:
initer_w = tf.random_normal_initializer(
stddev=stddev/np.sqrt(shape[1]+outputs_size), seed=seed)
initer_b = tf.random_normal_initializer(
stddev=stddev, mean=bavg, seed=seed)
w = tf.get_variable('matrix',
[shape[1], outputs_size],
tf_precision,
initer_w)
b = tf.get_variable('bias',
[outputs_size],
tf_precision,
initer_b)
hidden = tf.matmul(inputs, w) + b
if activation_fn != None and with_timestep:
if init is not None:
a_i_t = init[2]
i_t_s = a_i_t.shape
a_e_t = np.random.normal(
scale=0.001, loc=0.1, size=[outputs_size])
a_e_t[0:i_t_s[0]] = a_i_t
initer_t = tf.constant_initializer(a_e_t)
else:
initer_t = tf.random_normal_initializer(
stddev=0.001, mean=0.1, seed=seed)
timestep = tf.get_variable('timestep',
[outputs_size],
tf_precision,
initer_t)
if activation_fn != None:
if self.useBN:
# None
hidden_bn = self._batch_norm(
hidden, name=name+'_normalization', reuse=reuse)
layer_out = activation_fn(hidden_bn)
else:
if with_timestep:
layer_out = activation_fn(hidden) * timestep
else:
layer_out = activation_fn(hidden)
else:
if self.useBN:
# None
layer_out = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)
else:
layer_out = hidden
return tf.nn.dropout(layer_out, rate=drop_out_rate)
def _final_layer(self,
inputs,
outputs_size,
activation_fn=tf.nn.tanh,
stddev=1.0,
bavg=0.0,
init=None,
name='linear',
reuse=None,
seed=None):
with tf.variable_scope(name, reuse=reuse):
shape = inputs.get_shape().as_list()
if init is not None:
a_i_w = init
i_w_s = a_i_w.shape
a_e_w = np.random.normal(
scale=stddev/np.sqrt(shape[1]+outputs_size), size=[shape[1], outputs_size])
a_e_w[:i_w_s[0], :i_w_s[1]] = a_i_w
initer_w = tf.constant_initializer(a_e_w)
else:
initer_w = tf.random_normal_initializer(
stddev=stddev/np.sqrt(shape[1]+outputs_size), seed=seed)
w = tf.get_variable('matrix',
[shape[1], outputs_size],
tf_precision,
initer_w)
hidden = tf.matmul(inputs, w)
if activation_fn != None:
if self.useBN:
# None
hidden_bn = self._batch_norm(
hidden, name=name+'_normalization', reuse=reuse)
return activation_fn(hidden_bn)
else:
return activation_fn(hidden)
else:
if self.useBN:
# None
return self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)
else:
return hidden
def _batch_norm(self, x, name, reuse):
"""Batch normalization"""
with tf.variable_scope(name, reuse=reuse):
params_shape = [x.get_shape()[-1]]
beta = tf.get_variable('beta', params_shape, tf_precision,
initializer=tf.random_normal_initializer(0.0, stddev=0.1, dtype=tf_precision))
gamma = tf.get_variable('gamma', params_shape, tf_precision,
initializer=tf.random_uniform_initializer(0.1, 0.5, dtype=tf_precision))
with tf.variable_scope(name+'moving', reuse=False):
moving_mean = tf.get_variable('moving_mean', params_shape, tf_precision,
initializer=tf.constant_initializer(
0.0, tf_precision),
trainable=False)
moving_variance = tf.get_variable('moving_variance', params_shape, tf_precision,
initializer=tf.constant_initializer(
1.0, tf_precision),
trainable=False)
# These ops will only be preformed when training
mean, variance = tf.nn.moments(x, [0], name='moments')
self._extra_train_ops.append(
moving_averages.assign_moving_average(moving_mean, mean, self.mv_decay))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, self.mv_decay))
mean, variance = control_flow_ops.cond(self.is_training,
lambda: (mean, variance),
lambda: (moving_mean, moving_variance))
# # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper net.
y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-6)
y.set_shape(x.get_shape())
return y