@@ -42,7 +42,77 @@ Labeled LDA can directly learn topics(tags) correspondences.
4242```
4343# @source code: example/exapmle.py
4444
45+ import model.labeled_lda as llda
4546
47+ # initialize data
48+ labeled_documents = [("example example example example example"*10, ["example"]),
49+ ("test llda model test llda model test llda model"*10, ["test", "llda_model"]),
50+ ("example test example test example test example test"*10, ["example", "test"]),
51+ ("good perfect good good perfect good good perfect good "*10, ["positive"]),
52+ ("bad bad down down bad bad down"*10, ["negative"])]
53+
54+ # new a Labeled LDA model
55+ # llda_model = llda.LldaModel(labeled_documents=labeled_documents, alpha_vector="50_div_K", eta_vector=0.001)
56+ # llda_model = llda.LldaModel(labeled_documents=labeled_documents, alpha_vector=0.02, eta_vector=0.002)
57+ llda_model = llda.LldaModel(labeled_documents=labeled_documents, alpha_vector=0.01)
58+ print llda_model
59+
60+ # training
61+ # llda_model.training(iteration=10, log=True)
62+ while True:
63+ print("iteration %s sampling..." % (llda_model.iteration + 1))
64+ llda_model.training(1)
65+ print "after iteration: %s, perplexity: %s" % (llda_model.iteration, llda_model.perplexity())
66+ print "delta beta: %s" % llda_model.delta_beta
67+ if llda_model.is_convergent(method="beta", delta=0.01):
68+ break
69+
70+ # update
71+ print "before updating: ", llda_model
72+ update_labeled_documents = [("new example test example test example test example test", ["example", "test"])]
73+ llda_model.update(labeled_documents=update_labeled_documents)
74+ print "after updating: ", llda_model
75+
76+ # train again
77+ # llda_model.training(iteration=10, log=True)
78+ while True:
79+ print("iteration %s sampling..." % (llda_model.iteration + 1))
80+ llda_model.training(1)
81+ print "after iteration: %s, perplexity: %s" % (llda_model.iteration, llda_model.perplexity())
82+ print "delta beta: %s" % llda_model.delta_beta
83+ if llda_model.is_convergent(method="beta", delta=0.01):
84+ break
85+
86+ # inference
87+ # note: the result topics may be different for difference training, because gibbs sampling is a random algorithm
88+ document = "example llda model example example good perfect good perfect good perfect" * 100
89+
90+ topics = llda_model.inference(document=document, iteration=100, times=10)
91+ print topics
92+
93+ # perplexity
94+ # calculate perplexity on test data
95+ perplexity = llda_model.perplexity(documents=["example example example example example",
96+ "test llda model test llda model test llda model",
97+ "example test example test example test example test",
98+ "good perfect good good perfect good good perfect good",
99+ "bad bad down down bad bad down"],
100+ iteration=30,
101+ times=10)
102+ print "perplexity on test data: %s" % perplexity
103+ # calculate perplexity on training data
104+ print "perplexity on training data: %s" % llda_model.perplexity()
105+
106+ # save to disk
107+ save_model_dir = "../data/model"
108+ # llda_model.save_model_to_dir(save_model_dir, save_derivative_properties=True)
109+ llda_model.save_model_to_dir(save_model_dir)
110+
111+ # load from disk
112+ llda_model_new = llda.LldaModel()
113+ llda_model_new.load_model_from_dir(save_model_dir, load_derivative_properties=False)
114+ print "llda_model_new", llda_model_new
115+ print "llda_model", llda_model
46116```
47117
48118
0 commit comments