Skip to content

Commit a236587

Browse files
committed
ADD: some logs
1 parent 0aff082 commit a236587

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

example/example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@
5151

5252
# load from disk
5353
llda_model_new = llda.LldaModel()
54-
llda_model_new.load_model_from_dir(save_model_dir)
54+
llda_model_new.load_model_from_dir(save_model_dir, load_derivative_properties=True)
5555
print "llda_model_new", llda_model_new
5656
print "llda_model", llda_model

model/labeled_lda.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ def _gibbs_sample_training(self):
258258

259259
p_vector = beta_vector * theta_vector
260260
# print p_vector
261+
"""
262+
for some special document m (only have one word) p_vector may be zero here, sum(p_vector) will be zero too
263+
1.0 * p_vector / sum(p_vector) will be [...nan...]
264+
so we should avoid inputting the special document
265+
"""
261266
p_vector = 1.0 * p_vector / sum(p_vector)
262267
# print p_vector
263268
sample_z = LldaModel._multinomial_sample(p_vector)
@@ -513,6 +518,8 @@ def theta_m(self, m):
513518
@property
514519
def beta(self):
515520
"""
521+
This name "beta" comes from
522+
"Labeled LDA: A supervised topic model for credit attribution in multi-labeled corpora, Daniel Ramage..."
516523
topic-term distribution
517524
beta[k, t] is the probability of term t(word) to be generated from topic k
518525
:return: a matrix, shape is K * T
@@ -542,25 +549,27 @@ def theta(self):
542549
def log_perplexity(self):
543550
"""
544551
log perplexity of LDA topic model
552+
Reference: Parameter estimation for text analysis, Gregor Heinrich.
545553
:return: a float value
546554
"""
547555
beta = self.beta
548556
# theta = self.theta
549557
log_likelihood = 0
550-
word_count = 0
558+
# word_count = 0
551559
for m, theta_m in enumerate(self.theta):
552560
for t in self.W[m]:
553561
likelihood_t = np.inner(beta[:, t], theta_m)
554562
# print likelihood_t
555563
log_likelihood += -np.log(likelihood_t)
556-
word_count += 1
557-
assert word_count == self.WN, "word_count: %s\tself.WN: %s" % (word_count, self.WN)
564+
# word_count += 1
565+
# assert word_count == self.WN, "word_count: %s\tself.WN: %s" % (word_count, self.WN)
558566
return 1.0 * log_likelihood / self.WN
559567

560568
@property
561569
def perplexity(self):
562570
"""
563571
perplexity of LDA topic model
572+
Reference: Parameter estimation for text analysis, Gregor Heinrich.
564573
:return: a float value, perplexity = exp{log_perplexity}
565574
"""
566575
return np.exp(self.log_perplexity)

0 commit comments

Comments
 (0)