-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpublications.bib
564 lines (514 loc) · 31 KB
/
publications.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
%%% OUR PUBLICATIONS %%%
%%% Publications concerning EMW project %%%
@article{abs-yoruk-2021,
author = {Erdem Yörük and Ali Hürriyetoğlu and Fırat Duruşan, Çağrı Yoltar},
title ={Random Sampling in Corpus Design: Cross-Context Generalizability in Automated Multi-Country Protest Event Collection},
journal = {American Behavioral Scientist},
year = {2021},
}
@article{10.1162/dint_a_00092,
author = {Ali Hürriyetoğlu and Erdem Yörük and Osman Mutlu and Fırat Duruşan and Çağrı Yoltar and Deniz Yüret and Burak Gürel},
title = {Cross-Context News Corpus for Protest Event-Related Knowledge Base Construction},
journal = {Data Intelligence},
volume = {3},
number = {2},
year = {2021},
issn = {2641-435X},
doi = {10.1162/dint_a_00092},
}
@inproceedings{hurriyetoglu-etal-2020-covcor20,
title = "{COVCOR}20 at {WNUT}-2020 Task 2: An Attempt to Combine Deep Learning and Expert rules",
author = {H{\"u}rriyeto{\u{g}}lu, Ali and
Safaya, Ali and
Mutlu, Osman and
Oostdijk, Nelleke and
Y{\"o}r{\"u}k, Erdem},
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.wnut-1.75",
doi = "10.18653/v1/2020.wnut-1.75",
pages = "495--498",
abstract = "In the scope of WNUT-2020 Task 2, we developed various text classification systems, using deep learning models and one using linguistically informed rules. While both of the deep learning systems outperformed the system using the linguistically informed rules, we found that through the integration of (the output of) the three systems a better performance could be achieved than the standalone performance of each approach in a cross-validation setting. However, on the test data the performance of the integration was slightly lower than our best performing deep learning model. These results hardly indicate any progress in line of integrating machine learning and expert rules driven systems. We expect that the release of the annotation manuals and gold labels of the test data after this workshop will shed light on these perplexing results.",
}
@inproceedings{
hurriyetoglu2020crosscontext,
title={Cross-context News Corpus for Protest Events related Knowledge Base Construction},
author={Ali H{\"u}rriyeto{\u{g}}lu and Erdem Y{\"o}r{\"u}k and Deniz Y{\"u}ret and Osman Mutlu and {\c{C}}a{\u{g}}r{\i} Yoltar and F{\i}rat Duru{\c{s}}an and Burak G{\"u}rel},
booktitle={Automated Knowledge Base Construction},
year={2020},
url={https://openreview.net/forum?id=7NZkNhLCjp},
doi={10.24432/C5D59R}
}
@inproceedings{buyukoz-etal-2020-analyzing,
title = "Analyzing {ELM}o and {D}istil{BERT} on Socio-political News Classification",
author = {B{\"u}y{\"u}k{\"o}z, Berfu and
H{\"u}rriyeto{\u{g}}lu, Ali and
{\"O}zg{\"u}r, Arzucan},
booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://www.aclweb.org/anthology/2020.aespen-1.4",
pages = "9--18",
abstract = "This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A {''}cross-context{''} setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30{\%} smaller and 83{\%} faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations.",
language = "English",
ISBN = "979-10-95546-50-4",
}
@inproceedings{Hurriyetoglu+20b,
title = "Automated Extraction of Socio-political Events from News ({AESPEN}): Workshop and Shared Task Report",
author = {H{\"u}rriyeto{\u{g}}lu, Ali and
Zavarella, Vanni and
Tanev, Hristo and
Y{\"o}r{\"u}k, Erdem and
Safaya, Ali and
Mutlu, Osman},
booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://www.aclweb.org/anthology/2020.aespen-1.1",
pages = "1--6",
abstract = "We describe our effort on automated extraction of socio-political events from news in the scope of a workshop and a shared task we organized at Language Resources and Evaluation Conference (LREC 2020). We believe the event extraction studies in computational linguistics and social and political sciences should further support each other in order to enable large scale socio-political event information collection across sources, countries, and languages. The event consists of regular research papers and a shared task, which is about event sentence coreference identification (ESCI), tracks. All submissions were reviewed by five members of the program committee. The workshop attracted research papers related to evaluation of machine learning methodologies, language resources, material conflict forecasting, and a shared task participation report in the scope of socio-political event information collection. It has shown us the volume and variety of both the data sources and event information collection approaches related to socio-political events and the need to fill the gap between automated text processing techniques and requirements of social and political sciences.",
language = "English",
ISBN = "979-10-95546-50-4",
}
@InProceedings{Hurriyetoglu+19b,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Y{\"o}r{\"u}k, Erdem
and Y{\"u}ret, Deniz
and Yoltar, {\c{C}}a{\u{g}}r{\i}
and G{\"u}rel, Burak
and Duru{\c{s}}an, F{\i}rat
and Mutlu, Osman
and Akdemir, Arda",
editor="Crestani, Fabio
and Braschler, Martin
and Savoy, Jacques
and Rauber, Andreas
and M{\"u}ller, Henning
and Losada, David E.
and Heinatz B{\"u}rki, Gundula
and Cappellato, Linda
and Ferro, Nicola",
title="Overview of CLEF 2019 Lab ProtestNews: Extracting Protests from News in a Cross-Context Setting",
booktitle="Experimental IR Meets Multilinguality, Multimodality, and Interaction",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="425--432",
abstract="We present an overview of the CLEF-2019 Lab ProtestNews on Extracting Protests from News in the context of generalizable natural language processing. The lab consists of document, sentence, and token level information classification and extraction tasks that were referred as task 1, task 2, and task 3 respectively in the scope of this lab. The tasks required the participants to identify protest relevant information from English local news at one or more aforementioned levels in a cross-context setting, which is cross-country in the scope of this lab. The training and development data were collected from India and test data was collected from India and China. The lab attracted 58 teams to participate in the lab. 12 and 9 of these teams submitted results and working notes respectively. We have observed neural networks yield the best results and the performance drops significantly for majority of the submissions in the cross-country setting, which is China.",
isbn="978-3-030-28577-7"
}
@InProceedings{Hurriyetoglu+19,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Y{\"o}r{\"u}k, Erdem
and Y{\"u}ret, Deniz
and Yoltar, {\c{C}}a{\u{g}}r{\i}
and G{\"u}rel, Burak
and Duru{\c{s}}an, F{\i}rat
and Mutlu, Osman",
editor="Azzopardi, Leif
and Stein, Benno
and Fuhr, Norbert
and Mayr, Philipp
and Hauff, Claudia
and Hiemstra, Djoerd",
title="A Task Set Proposal for Automatic Protest Information Collection Across Multiple Countries",
booktitle="Advances in Information Retrieval",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="316--323",
abstract="We propose a coherent set of tasks for protest information collection in the context of generalizable natural language processing. The tasks are news article classification, event sentence detection, and event extraction. Having tools for collecting event information from data produced in multiple countries enables comparative sociology and politics studies. We have annotated news articles in English from a source and a target country in order to be able to measure the performance of the tools developed using data from one country on data from a different country. Our preliminary experiments have shown that the performance of the tools developed using English texts from India drops to a level that are not usable when they are applied on English texts from China. We think our setting addresses the challenge of building generalizable NLP tools that perform well independent of the source of the text and will accelerate progress in line of developing generalizable NLP systems.",
isbn="978-3-030-15719-7"
}
@inproceedings{Akdemir+2018,
author = {Akdemir, Arda and H\"{u}rriyeto\u{g}lu, Ali and Y\"{o}r\"{u}k, Erdem and G\"{u}rel, Burak and Yoltar, \c{C}a\u{g}ri and Y\"{u}ret, Deniz},
title = {{Towards Generalizable Place Name Recognition Systems: Analysis and Enhancement of NER Systems on English News from India}},
booktitle = {Proceedings of the 12th Workshop on Geographic Information Retrieval},
series = {GIR'18},
year = {2018},
isbn = {978-1-4503-6034-0},
location = {Seattle, WA, USA},
pages = {8:1--8:10},
articleno = {8},
numpages = {10},
url = {http://doi.acm.org/10.1145/3281354.3281363},
doi = {10.1145/3281354.3281363},
acmid = {3281363},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Entity Extraction, Machine Learning, Named Entity Recognition, Natural Language Processing, Place Name Recognition},
}
@inproceedings{Buyukoz+2019,
author = {B\"{u}y\"{u}k\"{o}z, Berfu and H\"{u}rriyeto\u{g}lu, Ali and Y\"{o}r\"{u}k, Erdem and Y\"{u}ret, Deniz},
title = {{Examining Existing Information Extraction Tools on Manually-Annotated Protest Events in Indian News}},
booktitle = {{Proceedings of Computational Linguistics in Netherlands (CLIN)}},
series = {CLIN29},
year = {2019},
location = {Groningen, the Netherlands},
numpages = {1}
}
%%% Publications of individual team members %%%
%% Erdem Yoruk %%
@InProceedings{Sonmez+16,
author = "S{\"o}nmez, {\c{C}}a{\u{g}}{\i}l
and {\"O}zg{\"u}r, Arzucan
and Y{\"o}r{\"u}k, Erdem",
title = "Towards Building a Political Protest Database to Explain Changes in the Welfare State ",
booktitle = "Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities ",
year = "2016",
publisher = "Association for Computational Linguistics",
pages = "106--110",
location = "Berlin, Germany",
doi = "10.18653/v1/W16-2113",
url = "http://www.aclweb.org/anthology/W16-2113"
}
@book{Yoruk12,
title={The politics of the Turkish welfare system transformation in the neoliberal era: Welfare as mobilization and containment},
author={Yoruk, Erdem},
year={2012},
publisher={The Johns Hopkins University}
}
%% Ali Hurriyetoglu %%
@INPROCEEDINGS{Hurriyetoglu+13,
author = {Ali H{\"u}rriyeto\u{g}lu and Florian Kunneman and Antal van den Bosch},
title = {{Estimating the Time between Twitter Messages and Future Events}},
booktitle = {{Proceedings of the 13th Dutch-Belgian Workshop on Information Retrieval}},
year = {2013},
pages = {20--23},
url = {http://ceur-ws.org/Vol-986/paper_23.pdf},
}
@INPROCEEDINGS{Hurriyetoglu+14,
author = {H\"{u}rriyeto\u{g}lu, Ali and Oostdijk, Nelleke and van den Bosch, Antal},
title = {Estimating Time to Event from Tweets Using Temporal Expressions},
booktitle = {{Proceedings of the 5th Workshop on Language Analysis for Social Media (LASM)}},
month = {April},
year = {2014},
address = {Gothenburg, Sweden},
publisher = {Association for Computational Linguistics},
pages = {8--16},
url = {http://www.aclweb.org/anthology/W14-1302}
}
@inproceedings{Hurriyetoglu+16a,
AUTHOR = {Ali H\"{u}rriyeto\u{g}lu and Jurjen Wagemaker Antal van den Bosch and Nelleke Oostdijk},
TITLE = {{Analysing the Role of Key Term Inflections in Knowledge Discovery on Twitter}},
ADDRESS = {Cagliari, Italy},
MONTH = {September},
booktitle = {{Proceedings of the 1st International Workshop on Knowledge Discovery on the WEB}},
url= {http://www.iascgroup.it/kdweb2016-program/accepted-papers.html},
YEAR = {2016}}
@Inbook{Hurriyetoglu+16b,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Gudehus, Christian
and Oostdijk, Nelleke
and van den Bosch, Antal",
editor="Spiro, Emma
and Ahn, Yong-Yeol",
title={{Relevancer: Finding and Labeling Relevant Information in Tweet Collections}},
bookTitle={{Social Informatics: 8th International Conference, SocInfo 2016, Bellevue, WA, USA, November 11-14, 2016, Proceedings, Part II}},
year="2016",
publisher="Springer International Publishing",
address="Cham",
pages="210--224",
isbn="978-3-319-47874-6",
doi="10.1007/978-3-319-47874-6_15",
url="http://dx.doi.org/10.1007/978-3-319-47874-6_15"
}
@inproceedings{Hurriyetoglu+16c,
AUTHOR = {Ali H\"{u}rriyeto\u{g}lu and Antal van den Bosch and Nelleke Oostdijk},
TITLE = {{Using Relevancer to Detect Relevant Tweets: The Nepal Earthquake Case}},
ADDRESS = {Kolkata, India},
MONTH = {December},
booktitle = {{Working notes of FIRE 2016 - Forum for Information Retrieval Evaluation}},
url = {http://ceur-ws.org/Vol-1737/T2-6.pdf},
YEAR = {2016}}
@inproceedings{Hurriyetoglu+17,
AUTHOR = {Ali H\"{u}rriyeto\u{g}lu and Nelleke Oostdijk},
TITLE = {{Extracting Humanitarian Information from Tweets}},
ADDRESS = {Aberdeen, United Kingdom},
MONTH = {April},
booktitle = {{Proceedings of the First International Workshop on Exploitation of Social Media for Emergency Relief and Preparedness}},
url = {http://ceur-ws.org/Vol-1832/SMERP-2017-DC-RU-Retrieval.pdf},
YEAR = {2017}}
@Inbook{Hurriyetoglu+17b,
author="H{\"u}rriyeto{\v{g}}lu, Ali
and Oostdijk, Nelleke
and Erkan Ba{\c{s}}ar, Mustafa
and van den Bosch, Antal",
editor="Frasincar, Flavius
and Ittoo, Ashwin
and Nguyen, Le Minh
and M{\'e}tais, Elisabeth",
title={{Supporting Experts to Handle Tweet Collections About Significant Events}},
bookTitle={{Natural Language Processing and Information Systems: 22nd International Conference on Applications of Natural Language to Information Systems, NLDB 2017, Li{\`e}ge, Belgium, June 21-23, 2017, Proceedings}},
year="2017",
publisher="Springer International Publishing",
address="Cham",
pages="138--141",
abstract="We introduce Relevancer that processes a tweet set and enables generating an automatic classifier from it. Relevancer satisfies information needs of experts during significant events. Enabling experts to combine automatic procedures with expertise is the main contribution of our approach and the added value of the tool. Even a small amount of feedback enables the tool to distinguish between relevant and irrelevant information effectively. Thus, Relevancer facilitates the quick understanding of and proper reaction to events presented on Twitter.",
isbn="978-3-319-59569-6",
doi="10.1007/978-3-319-59569-6_14",
url="https://doi.org/10.1007/978-3-319-59569-6_14"
}
@Inbook{Hurriyetoglu+18,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Oostdijk, Nelleke
and van den Bosch, Antal",
editor="Shaalan, Khaled and Hassanien, Aboul Ella and Tolba, Fahmy",
title={{Estimating Time to Event based on Linguistic Cues on Twitter.}},
bookTitle={{Intelligent Natural Language Processing: Trends and Applications}},
year="2018",
publisher="Springer International Publishing",
series="Studies in Computational Intelligence",
volume="740",
isbn="978-3-319-67056-0",
url="http://www.springer.com/cn/book/9783319670553"
}
%% Osman Mutlu %%
@inproceedings{mutlu-etal-2019-team,
title = "Team Howard {B}eale at {S}em{E}val-2019 Task 4: Hyperpartisan News Detection with {BERT}",
author = "Mutlu, Osman and
Can, Ozan Arkan and
Dayanik, Erenay",
booktitle = "Proceedings of the 13th International Workshop on Semantic Evaluation",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/S19-2175",
doi = "10.18653/v1/S19-2175",
pages = "1007--1011",
abstract = "This paper describes our system for SemEval-2019 Task 4: Hyperpartisan News Detection (Kiesel et al., 2019). We use pretrained BERT (Devlin et al., 2018) architecture and investigate the effect of different fine tuning regimes on the final classification task. We show that additional pretraining on news domain improves the performance on the Hyperpartisan News Detection task. Our system ranked 8th out of 42 teams with 78.3{\%} accuracy on the held-out test dataset.",
}
%%% OTHER PUBLICATIONS %%%
@incollection{Boschee+13,
author={Boschee, Elizabeth and Natarajan, Premkumar and Weischedel, Ralph},
editor={Subrahmanian, V.S.},
title={{Automatic Extraction of Events from Open Source Text for Predictive Forecasting}},
bookTitle={Handbook of Computational Approaches to Counterterrorism},
year={2013},
publisher={Springer New York},
address={New York, NY},
pages={51--67},
abstract={Automated analysis of news reports is a significant empowering technology for predictive models of political instability. To date, the standard approach to this analytic task has been embodied in systems such as KEDS/TABARI [1], which use manually-generated rules and shallow parsing techniques to identify events and their participants in text. In this chapter we explore an alternative to event extraction based on BBN SERIFTM, and BBN OnTopicTM, two state-of-the-art statistical natural language processing engines. We empirically compare this new approach to existing event extraction techniques on five dimensions: (1) Accuracy: when an event is reported by the system, how often is it correct? (2) Coverage: how many events are correctly reported by the system? (3) Filtering of historical events: how well are historical events (e.g. 9/11) correctly filtered out of the current event data stream? (4) Topic-based event filtering: how well do systems filter out red herrings based on document topic, such as sports documents mentioning ``clashes'' between two countries on the playing field? (5) Domain shift: how well do event extraction models perform on data originating from diverse sources? In all dimensions we show significant improvement to the state-of-the-art by applying statistical natural language processing techniques. It is our hope that these results will lead to greater acceptance of automated coding by creators and consumers of social science models that depend on event data and provide a new way to improve the accuracy of those predictive models.},
isbn={978-1-4614-5311-6},
doi={10.1007/978-1-4614-5311-6\_3},
url={https://doi.org/10.1007/978-1-4614-5311-6\_3}
}
@inproceedings{Chen+09,
author = {Chen, Bo and Lam, Wai and Tsang, Ivor and Wong, Tak-Lam},
title = {{Extracting Discriminative Concepts for Domain Adaptation in Text Mining}},
booktitle = {Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
series = {KDD '09},
year = {2009},
isbn = {978-1-60558-495-9},
location = {Paris, France},
pages = {179--188},
numpages = {10},
url = {http://doi.acm.org/10.1145/1557019.1557045},
doi = {10.1145/1557019.1557045},
acmid = {1557045},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {domain adaptation, feature extraction, text mining},
}
@article{Chenoweth+13,
author = {Erica Chenoweth and Orion A Lewis},
title ={{Unpacking nonviolent campaigns: Introducing the NAVCO 2.0 dataset}},
journal = {Journal of Peace Research},
volume = {50},
number = {3},
pages = {415-423},
year = {2013},
doi = {10.1177/0022343312471551},
URL = {https://doi.org/10.1177/0022343312471551},
eprint = {https://doi.org/10.1177/0022343312471551},
abstract = { Recent studies indicate that strategic nonviolent campaigns have been more successful over time in achieving their political objectives than violent insurgencies. But additional research has been limited by a lack of time-series data on nonviolent and violent campaigns, as well as a lack of more nuanced and detailed data on the attributes of the campaigns. In this article, we introduce the Nonviolent and Violent Campaigns and Outcomes (NAVCO) 2.0 dataset, which compiles annual data on 250 nonviolent and violent mass movements for regime change, anti-occupation, and secession from 1945 to 2006. NAVCO 2.0 also includes features of each campaign, such as participation size and diversity, the behavior of regime elites, repression and its effects on the campaign, support (or lack thereof) from external actors, and progress toward the campaign outcomes. After describing the data generation process and the dataset itself, we demonstrate why studying nonviolent resistance may yield novel insights for conflict scholars by replicating an influential study of civil war onset. This preliminary study reveals strikingly divergent findings regarding the systematic drivers of nonviolent campaign onset. Nonviolent campaign onset may be driven by separate – and in some cases, opposing – processes relative to violent campaigns. This finding underscores the value-added of the dataset, as well as the importance of evaluating methods of conflict within a unified research design. }
}
@article{devlin2018bert,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
journal={arXiv preprint arXiv:1810.04805},
year={2018}
}
@InProceedings{Ettinger+17,
author = "Ettinger, Allyson
and Rao, Sudha
and Daum{\'e} III, Hal
and Bender, Emily M.",
title ={{Towards Linguistically Generalizable NLP Systems: A Workshop and Shared Task}},
booktitle = "Proceedings of the First Workshop on Building Linguistically Generalizable NLP Systems",
year="2017",
publisher = "Association for Computational Linguistics",
pages = "1--10",
location = "Copenhagen, Denmark",
url = "http://aclweb.org/anthology/W17-5401"
}
@article{Giugni98,
ISSN = {03600572, 15452115},
URL = {http://www.jstor.org/stable/223486},
abstract = {Research on social movements has usually addressed issues of movement emergence and mobilization, yet has paid less attention to their outcomes and consequences. Although there exists a considerable amount of work on this aspect, little systematic research has been done so far. Most existing work focuses on political and policy outcomes of movements, whereas few studies address their broader cultural and institutional effects. Furthermore, we still know little about the indirect and unintended consequences produced by movements. Early studies have dealt with the effectiveness of disruptive and violent actions and with the role of several organizational variables for movement success. More recently, scholars have begun to analyze movement outcomes in their political context by looking at the role of public opinion, allies, and state structures. A comparative perspective promises to be a fruitful avenue of research in this regard.},
author = {Marco G. Giugni},
journal = {Annual Review of Sociology},
pages = {371--393},
publisher = {Annual Reviews},
title = {{Was It Worth the Effort? The Outcomes and Consequences of Social Movements}},
volume = {24},
year = {1998}
}
@article{Hammond+14,
author = {Jesse Hammond and Nils B Weidmann},
title ={{Using machine-coded event data for the micro-level study of political violence}},
journal = {Research \& Politics},
volume = {1},
number = {2},
pages = {2053168014539924},
year = {2014},
doi = {10.1177/2053168014539924},
URL = {https://doi.org/10.1177/2053168014539924},
eprint = {https://doi.org/10.1177/2053168014539924},
abstract = { Machine-coded datasets likely represent the future of event data analysis. We assess the use of one of these datasets—Global Database of Events, Language and Tone (GDELT)—for the micro-level study of political violence by comparing it to two hand-coded conflict event datasets. Our findings indicate that GDELT should be used with caution for geo-spatial analyses at the subnational level: its overall correlation with hand-coded data is mediocre, and at the local level major issues of geographic bias exist in how events are reported. Overall, our findings suggest that due to these issues, researchers studying local conflict processes may want to wait for a more reliable geocoding method before relying too heavily on this set of machine-coded data. }
}
@inproceedings{Huang+16,
title={Distinguishing past, on-going, and future events: The eventstatus corpus},
author={Huang, Ruihong and Cases, Ignacio and Jurafsky, Dan and Condoravdi, Cleo and Riloff, Ellen},
booktitle={Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
pages={44--54},
year={2016}
}
@inproceedings{Leetaru+13,
title={{Gdelt: Global data on events, location, and tone, 1979--2012}},
author={Leetaru, Kalev and Schrodt, Philip A},
booktitle={ISA annual convention},
volume={2},
issue={4},
pages={1--49},
year={2013},
organization={Citeseer}
}
@inproceedings{Lorenzini+16,
title={{Towards a Dataset of Automatically Coded Protest Events from English-language Newswire Documents}},
author={Lorenzini, Jasmine and Makarov, Peter and Kriesi, Hanspeter and Wueest, Bruno},
booktitle={Paper presented at the Amsterdam Text Analysis Conference},
year={2016}
}
@article{Nardulli+15,
author = {Peter F. Nardulli and Scott L. Althaus and Matthew Hayes},
title ={{A Progressive Supervised-learning Approach to Generating Rich Civil Strife Data}},
journal = {Sociological Methodology},
volume = {45},
number = {1},
pages = {148-183},
year = {2015},
doi = {10.1177/0081175015581378},
URL = {https://doi.org/10.1177/0081175015581378},
eprint = {https://doi.org/10.1177/0081175015581378},
abstract = { “Big data” in the form of unstructured text pose challenges and opportunities to social scientists committed to advancing research frontiers. Because machine-based and human-centric approaches to content analysis have different strengths for extracting information from unstructured text, the authors argue for a collaborative, hybrid approach that combines their comparative advantages. The notion of a progressive supervised-learning approach that combines data science techniques and human coders is developed and illustrated using the Social, Political and Economic Event Database (SPEED) project’s Societal Stability Protocol. SPEED’s rich event data on civil strife reveal that conventional machine-based approaches for generating event data miss a great deal of within-category variance, while conventional human-based efforts to categorize periods of civil war or political instability routinely misspecify periods of calm and unrest. To demonstrate the potential of hybrid data collection methods, SPEED data on event intensities and origins are used to trace the changing role of political, socioeconomic, and sociocultural factors in generating global civil strife in the post–World War II era. }
}
@article{Norris2017,
doi = {10.21105/joss.00133},
url = {http://dx.doi.org/10.21105/joss.00133},
year = {2017},
month = {jan},
publisher = {The Open Journal},
volume = {2},
number = {9},
author = {Clayton Norris and Philip Schrodt and John Beieler},
title = {{PETRARCH}2: Another Event Coding Program},
journal = {The Journal of Open Source Software}
}
@article{Pan+10,
author = {Pan, Sinno Jialin and Yang, Qiang},
title = {{A Survey on Transfer Learning}},
journal = {IEEE Trans. on Knowl. and Data Eng.},
issue_date = {October 2010},
volume = {22},
number = {10},
month = oct,
year = {2010},
issn = {1041-4347},
pages = {1345--1359},
numpages = {15},
url = {http://dx.doi.org/10.1109/TKDE.2009.191},
doi = {10.1109/TKDE.2009.191},
acmid = {1850545},
publisher = {IEEE Educational Activities Department},
address = {Piscataway, NJ, USA},
keywords = {Transfer learning, Transfer learning, survey, machine learning, data mining., data mining., machine learning, survey},
}
@article{Salehyan+12,
title={Social conflict in Africa: A new database},
author={Salehyan, Idean and Hendrix, Cullen S and Hamner, Jesse and Case, Christina and Linebarger, Christopher and Stull, Emily and Williams, Jennifer},
journal={International Interactions},
volume={38},
number={4},
pages={503--511},
year={2012},
publisher={Taylor \& Francis}
}
@inproceedings{Schrodt+14,
title={{Three’sa charm?: Open event data coding with el: Diablo, Petrarch, and the open event data alliance}},
author={Schrodt, Philip A and Beieler, John and Idris, Muhammed},
booktitle={ISA Annual Convention},
year={2014}
}
@article{Soboroff+18,
author = {Soboroff, Ian, and Ferro, Nicola, and Fuhr, Norbert},
title = {{Report on GLARE 2018: 1st Workshop on Generalization in Information Retrieval: Can We Predict Performance in New Domains?}},
volume = {52},
number = {2},
pages = {132-137},
year = {2018},
URL = {http://sigir.org/wp-content/uploads/2019/01/p132.pdf},
journal = {SIGIR Forum}
}
@book{Tarrow94,
title={{Power in Movement: Social Movements, Collective Action and Politics}},
author={Tarrow, S.},
isbn={9780521422710},
lccn={lc93044901},
series={Cambridge Studies in Comparative Politics},
url={https://books.google.com.tr/books?id=hN5nQgAACAAJ},
year={1994},
publisher={Cambridge University Press}
}
@article{Wang+16,
author = {Wang, Wei and Kennedy, Ryan and Lazer, David and Ramakrishnan, Naren},
title = {{Growing pains for global monitoring of societal events}},
volume = {353},
number = {6307},
pages = {1502--1503},
year = {2016},
doi = {10.1126/science.aaf6758},
publisher = {American Association for the Advancement of Science},
issn = {0036-8075},
URL = {http://science.sciencemag.org/content/353/6307/1502},
eprint = {http://science.sciencemag.org/content/353/6307/1502.full.pdf},
journal = {Science}
}
@phdthesis{Wang18,
title={Event Detection and Extraction from News Articles},
author={Wang, Wei},
year={2018},
school={Virginia Tech}
}
@inbook{Weidman+forthcoming,
author={Weidmann, Nils B. and Espen Geelmuyden R{\o}d},
year= 2019,
chapter={Coding Protest Events in Autocracies},
title= {The Internet and Political Protest in Autocracies},
publisher= {Oxford University Press},
address= {Oxford},
}
@article{sang2003introduction,
title={Introduction to the CoNLL-2003 shared task: Language-independent named entity recognition},
author={Sang, Erik F and De Meulder, Fien},
journal={arXiv preprint cs/0306050},
year={2003}
}