1+ import indicoio
2+ from enso .mode import ModeKeys
3+ import multiprocessing
4+
5+ """Constants to configure the rest of Enso."""
6+
7+ # Directory for storing data
8+ DATA_DIRECTORY = "Data"
9+
10+ # Directory for storing results
11+ RESULTS_DIRECTORY = "Results"
12+
13+ # Directory for storing features
14+ FEATURES_DIRECTORY = "Features"
15+
16+ # Directory for storing experiment results
17+ EXPERIMENT_NAME = "Rationales"
18+
19+ # Name of the csv used to store results
20+ RESULTS_CSV_NAME = "Results.csv"
21+
22+ # Datasets to featurize or run experiments on
23+ DATA = {
24+ # "Classify/AirlineComplaints",
25+ # "Classify/AirlineNegativity",cRep
26+ # "Classify/IMDB",
27+ # "Classify/Irony",
28+ # "Classify/MPQA",
29+ # "Classify/MovieReviews",
30+ # "Classify/NewYearsResolutions",
31+ # "Classify/PoliticalTweetAlignment",
32+ # "Classify/PoliticalTweetBias",
33+ # "Classify/PoliticalTweetClassification",
34+ # "Classify/PoliticalTweetSubjectivity",
35+ # "Classify/PoliticalTweetTarget",
36+ # "Classify/ReligiousTexts",
37+ # "Classify/ShortAnswer",
38+ # "Classify/SocialMediaDisasters",
39+ # "Classify/Subjectivity",
40+ # "Classify/TextSpam",
41+ # "Classify/SST-binary"
42+ # Seqence
43+ # 'SequenceLabeling/Reuters-128',
44+ # "SequenceLabeling/table_synth",
45+ # 'SequenceLabeling/bonds_new',
46+ # 'SequenceLabeling/tables',
47+ # 'SequenceLabeling/typed_cols',
48+ # 'SequenceLabeling/brown_all',
49+ # 'SequenceLabeling/brown_nouns',
50+ # 'SequenceLabeling/brown_verbs',
51+ # 'SequenceLabeling/brown_pronouns',
52+ # 'SequenceLabeling/brown_adverbs',
53+ # 'RationalizedClassify/short_bank_qualified',
54+ # 'RationalizedClassify/bank_qualified',
55+ # 'RationalizedClassify/evidence_inference',
56+ # 'RationalizedClassify/federal_tax',
57+ # "RationalizedClassify/short_federal_tax",
58+ # 'RationalizedClassify/interest_frequency',
59+ # "RationalizedClassify/short_interest_frequency",
60+ "RationalizedClassify/aviation" ,
61+ # "RationalizedClassify/movie_reviews",
62+ # "RationalizedClassify/mining_rationales",
63+ # "RationalizedClassify/mining_extractions",
64+ # "RationalizedClassify/insurance_rationales",
65+ # "RationalizedClassify/insurance_extractions",
66+ # "RationalizedClassify/mining",
67+ # "RationalizedClassify/insurance_rationales_precise",
68+ # 'RationalizedClassify/short_bank_qualified',
69+ # 'RationalizedClassify/bank_qualified',
70+ # 'RationalizedClassify/short_bank_qualified_fixed',
71+ # 'RationalizedClassify/bank_qualified_fixed',
72+ # 'RationalizedClassify/short_bank_qualified_precise',
73+ # 'RationalizedClassify/bank_qualified_precise',
74+ }
75+
76+ # Featurizers to activate
77+ FEATURIZERS = {
78+ "PlainTextFeaturizer" ,
79+ # "TextContextFeaturizer",
80+ # "IndicoStandard",
81+ "SpacyGloveFeaturizer" ,
82+ # "IndicoFastText",
83+ # "IndicoSentiment",
84+ # "IndicoElmo",
85+ # "IndicoTopics",
86+ # "IndicoFinance",
87+ # "IndicoTransformer",
88+ # "IndicoEmotion",
89+ # "IndicoFastText",
90+ # "SpacyCNNFeaturizer",
91+ }
92+
93+ # Experiments to run
94+ EXPERIMENTS = {
95+ # "FinetuneSequenceLabel",
96+ # "Proto",
97+ # "IndicoSequenceLabel"
98+ "LRBaselineNonRationalized" ,
99+ "DistReweightedGloveClassifierCV" ,
100+ 'DistReweightedGloveByClassClassifierCV'
101+ # "RationaleInformedLRCV"
102+ # "FinetuneSeqBaselineRationalized",
103+ # "FinetuneClfBaselineNonRationalized",
104+ # "LogisticRegressionCV",
105+ # "KNNCV",
106+ # "TfidfKNN",
107+ # "TfidfLogisticRegression",
108+ # "KCenters",
109+ # "TfidfKCenters"
110+ # "SupportVectorMachineCV",
111+ }
112+
113+ # Metrics to compute
114+ METRICS = {
115+ # "Accuracy",
116+ "AccuracyRationalized" ,
117+ "MacroRocAucRationalized" ,
118+ # "MacroRocAuc",
119+ # "MacroCharF1",
120+ # "MacroCharRecall",
121+ # "MacroCharPrecision",
122+ }
123+
124+ # Test setup metadata
125+ TEST_SETUP = {
126+ "train_sizes" : [20 , 40 , 60 , 80 , 100 , 150 , 200 , 300 , 400 , 500 ],
127+ "n_splits" : 5 ,
128+ # "samplers": ['RandomRationalized'],
129+ # "samplers": ["ImbalanceSampler"],
130+ "samplers" : ["RandomRationalized" ],
131+ "sampling_size" : 0.2 ,
132+ "resamplers" : ["NoResampler" ]
133+ # "resamplers": ["RandomOverSampler"],
134+ }
135+
136+ # Visualizations to display
137+ VISUALIZATIONS = {"FacetGridVisualizer" }
138+
139+ # kwargs to pass directly into visualizations
140+ VISUALIZATION_OPTIONS = {
141+ "display" : True ,
142+ "save" : True ,
143+ "FacetGridVisualizer" : {
144+ "x_tile" : "Metric" ,
145+ "y_tile" : "Dataset" ,
146+ "x_axis" : "TrainSize" ,
147+ "y_axis" : "Result" ,
148+ "lines" : ["Experiment" , "Featurizer" , "Sampler" , "Resampler" ],
149+ "category" : "merge" ,
150+ "cv" : "mean" ,
151+ "filename" : "TestResult" ,
152+ },
153+ }
154+
155+ MODE = ModeKeys .RATIONALIZED
156+
157+ N_GPUS = 0
158+ N_CORES = 1 # multiprocessing.cpu_count()
159+
160+ FIX_REQUIREMENTS = True
161+
162+ GOLD_FRAC = 0.05
163+ CORRUPTION_FRAC = 0.4
164+
165+ indicoio .config .api_key = ""
166+
167+ # If we have no experiment hyperparameters we hope to modify:
168+ EXPERIMENT_PARAMS = {}
0 commit comments