TransE模型的简单介绍TransE模型的python代码实现--688IT编程网

def dataloader(file1, file2, file3):

print("")

entity =[]

relation =[]

with open(file2,'r')as f1,open(file3,'r')as f2:

lines1 = f1.readlines()

lines2 = f2.readlines()

for line in lines1:

line = line.strip().split('\t')

if len(line)!=2:

continue

entities2id[line[0]]= line[1]

entity.append(line[1])

for line in lines2:

line = line.strip().split('\t')

if len(line)!=2:

continue

relations2id[line[0]]= line[1]

relation.append(line[1])

margin rate

triple_list =[]

with codecs.open(file1,'r')as f:

content = f.readlines()

for line in content:

triple = line.strip().split("\t")

if len(triple)!=3:

continue

h_ = entities2id[triple[0]]

r_ = relations2id[triple[1]]

t_ = entities2id[triple[2]]

triple_list.append([h_, r_, t_])

print("Complete load. entity : %d , relation : %d , triple : %d"%(

len(entity),len(relation),len(triple_list)))

return entity, relation, triple_list

def norm_l1(h, r, t):

return np.sum(np.fabs(h + r - t))

def norm_l2(h, r, t):

return np.sum(np.square(h + r - t))

class TransE:

def__init__(self, entity, relation, triple_list, embedding_dim=50, lr=0.01, margin=1.0, norm=1): ities = entity

self.dimension = embedding_dim

self.learning_rate = lr

self.margin = margin

< = norm

self.loss =0.0

def data_initialise(self):

entityVectorList ={}

relationVectorList ={}

for entity ities:

entity_vector = np.random.uniform(-6.0/ np.sqrt(self.dimension),6.0/ np.sqrt(self.dimension),

self.dimension)

entityVectorList[entity]= entity_vector

for relation lations:

relation_vector = np.random.uniform(-6.0/ np.sqrt(self.dimension),6.0/ np.sqrt(self.dimension),

self.dimension)

relation_vector = alization(relation_vector)

relationVectorList[relation]= relation_vector

def normalization(self, vector):

return vector / (vector)

def training_run(self, epochs=1, nbatches=100, out_file_title =''):

batch_size =int(iples)/ nbatches)

print("batch size: ", batch_size)

for epoch in range(epochs):

start = time.time()

self.loss =0.0

# Normalise the embedding of the entities to 1

for entity ities.keys():

for batch in range(nbatches):

batch_samples = random.iples, batch_size)

Tbatch =[]

for sample in batch_samples:

corrupted_sample = copy.deepcopy(sample)

pr = np.random.random(1)[0]

if pr >0.5:

# change the head entity

corrupted_sample[0]= random.ities.keys(),1)[0]

while corrupted_sample[0]== sample[0]:

corrupted_sample[0]= random.ities.keys(),1)[0]

else:

# change the tail entity

corrupted_sample[2]= random.ities.keys(),1)[0]

while corrupted_sample[2]== sample[2]:

corrupted_sample[2]= random.ities.keys(),1)[0]

if(sample, corrupted_sample)not in Tbatch:

Tbatch.append((sample, corrupted_sample))

self.update_triple_embedding(Tbatch)

end = time.time()

print("epoch: ", epoch,"cost time: %s"%(round((end - start),3)))

print("running loss: ", self.loss)

with codecs.open(out_file_title +"TransE_entity_"+str(self.dimension)+"dim_batch"+str(batch_size),"w")as f1:

for e ities.keys():

# f1.write("\t")

# f1.write(e + "\t")

f1.write(str(ities[e])))

f1.write("\n")

with codecs.open(out_file_title +"TransE_relation_"+str(self.dimension)+"dim_batch"+str(batch_size),"w")as f2: for r lations.keys():

for r lations.keys():

# f2.write("\t")

# f2.write(r + "\t")

f2.write(str(lations[r])))

f2.write("\n")

def update_triple_embedding(self, Tbatch):

# deepcopy 可以保证，即使list嵌套list也能让各层的地址不同，即这⾥copy_entity 和

# entitles中所有的elements都不同

copy_entity = copy.ities)

copy_relation = copy.lations)

for correct_sample, corrupted_sample in Tbatch:

correct_copy_head = copy_entity[correct_sample[0]]

correct_copy_tail = copy_entity[correct_sample[2]]

relation_copy = copy_relation[correct_sample[1]]

corrupted_copy_head = copy_entity[corrupted_sample[0]]

corrupted_copy_tail = copy_entity[corrupted_sample[2]]

correct_head = ities[correct_sample[0]]

correct_tail = ities[correct_sample[2]]

relation = lations[correct_sample[1]]

corrupted_head = ities[corrupted_sample[0]]

corrupted_tail = ities[corrupted_sample[2]]

# calculate the distance of the triples

==1:

correct_distance = norm_l1(correct_head, relation, correct_tail)

corrupted_distance = norm_l1(corrupted_head, relation, corrupted_tail)

else:

correct_distance = norm_l2(correct_head, relation, correct_tail)

corrupted_distance = norm_l2(corrupted_head, relation, corrupted_tail)

loss = self.margin + correct_distance - corrupted_distance

if loss >0:

self.loss += loss

print(loss)

correct_gradient =2*(correct_head + relation - correct_tail)

corrupted_gradient =2*(corrupted_head + relation - corrupted_tail)

==1:

for i in range(len(correct_gradient)):

if correct_gradient[i]>0:

correct_gradient[i]=1

else:

correct_gradient[i]=-1

if corrupted_gradient[i]>0:

corrupted_gradient[i]=1

else:

corrupted_gradient[i]=-1

correct_copy_head -= self.learning_rate * correct_gradient

relation_copy -= self.learning_rate * correct_gradient

correct_copy_tail -=-1* self.learning_rate * correct_gradient

relation_copy -=-1* self.learning_rate * corrupted_gradient

if correct_sample[0]== corrupted_sample[0]:

# if corrupted_triples replaces the tail entity, the head entity's embedding need to be updated twice correct_copy_head -=-1* self.learning_rate * corrupted_gradient

corrupted_copy_tail -= self.learning_rate * corrupted_gradient

elif correct_sample[2]== corrupted_sample[2]:

# if corrupted_triples replaces the head entity, the tail entity's embedding need to be updated twice corrupted_copy_head -=-1* self.learning_rate * corrupted_gradient

correct_copy_tail -= self.learning_rate * corrupted_gradient

# normalising these new embedding vector, instead of normalising all the embedding together

copy_entity[correct_sample[0]]= alization(correct_copy_head)

copy_entity[correct_sample[2]]= alization(correct_copy_tail)

if correct_sample[0]== corrupted_sample[0]:

# if corrupted_triples replace the tail entity, update the tail entity's embedding

copy_entity[corrupted_sample[2]]= alization(corrupted_copy_tail)

elif correct_sample[2]== corrupted_sample[2]:

# if corrupted_triples replace the head entity, update the head entity's embedding

copy_entity[corrupted_sample[0]]= alization(corrupted_copy_head)

# the paper mention that the relation's embedding don't need to be normalised

copy_relation[correct_sample[1]]= relation_copy

# copy_relation[correct_sample[1]] = alization(relation_copy)

if __name__ =='__main__':

file1 ="/"

file2 ="/"

file3 ="/"

entity_set, relation_set, triple_list = dataloader(file1, file2, file3)

# modify by yourself

transE = TransE(entity_set, relation_set, triple_list, embedding_dim=30, lr=0.01, margin=1.0, norm=2)

transE.data_initialise()

688IT编程网

TransE模型的简单介绍TransE模型的python代码实现

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

excel文字递增函数公式

数字递增公式

notepad 正则变量运算

C++regex库常用函数及实例

js正则表达式之前瞻后顾与非捕获分组

indesign正则数字和英文之间的空格

C#匹配中文字符串的4种正则表达式分享

PHP正则表达式匹配中文字符

匹配中文汉字的正则表达式介绍

Python正则表达式如何进行字符串替换

orcl中用正则表达式

sql正则表达式excel

dataframe正则表达式

postgress sql正则

el-upload accept 正则表达式

半小时正则表达式

判断科学计数法的正则

根据url判断静态资源的方法

Java正则表达式-匹配正负浮点数

替换模糊匹配正则-hive

最新文章

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

能被5整除的十进制整数的正规表达式

大于0小于等于1的正则表达式

linux grep 26个字母

java pattern 正则表达式

掌握文本编辑器中的搜索和替换技巧

标签列表

688IT编程网

TransE模型的简单介绍TransE模型的python代码实现

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

excel文字递增函数公式

数字递增公式

notepad 正则变量运算

C++regex库常用函数及实例

js正则表达式之前瞻后顾与非捕获分组

indesign正则数字和英文之间的空格

C#匹配中文字符串的4种正则表达式分享

PHP正则表达式匹配中文字符

匹配中文汉字的正则表达式介绍

Python正则表达式如何进行字符串替换

orcl中用正则表达式

sql正则表达式excel

dataframe正则表达式

postgress sql正则

el-upload accept 正则表达式

半小时 正则表达式

判断科学计数法的正则

根据url判断静态资源的方法

Java正则表达式-匹配正负浮点数

替换模糊匹配正则-hive

最新文章

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

能被5整除的十进制整数的正规表达式

大于0小于等于1的正则表达式

linux grep 26个字母

java pattern 正则表达式

掌握文本编辑器中的搜索和替换技巧

标签列表

java正则表达式选择题

非零金额正则表达式

半小时正则表达式