Skip to content

Commit c45dd7c

Browse files
committed
Add a simple example model that operates on webcompat issues
First model enabled by the work on #259
1 parent 5d1fcbd commit c45dd7c

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

bugbug/models/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"annotateignore": "bugbug.models.annotate_ignore.AnnotateIgnoreModel",
1313
"assignee": "bugbug.models.assignee.AssigneeModel",
1414
"backout": "bugbug.models.backout.BackoutModel",
15+
"browsername": "bugbug.models.browsername.BrowserNameModel",
1516
"bug": "bugbug.model.BugModel",
1617
"bugtype": "bugbug.models.bugtype.BugTypeModel",
1718
"component": "bugbug.models.component.ComponentModel",

bugbug/models/browsername.py

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# -*- coding: utf-8 -*-
2+
# This Source Code Form is subject to the terms of the Mozilla Public
3+
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
4+
# You can obtain one at http://mozilla.org/MPL/2.0/.
5+
6+
import logging
7+
8+
import xgboost
9+
from sklearn.compose import ColumnTransformer
10+
from sklearn.feature_extraction import DictVectorizer
11+
from sklearn.pipeline import Pipeline
12+
13+
from bugbug import feature_cleanup, github, issue_features, utils
14+
from bugbug.model import IssueModel
15+
16+
logger = logging.getLogger(__name__)
17+
18+
19+
class BrowserNameModel(IssueModel):
20+
def __init__(self, lemmatization=False):
21+
IssueModel.__init__(self, lemmatization)
22+
23+
feature_extractors = [
24+
issue_features.comment_count(),
25+
]
26+
27+
cleanup_functions = [
28+
feature_cleanup.fileref(),
29+
feature_cleanup.url(),
30+
feature_cleanup.synonyms(),
31+
]
32+
33+
self.extraction_pipeline = Pipeline(
34+
[
35+
(
36+
"issue_extractor",
37+
issue_features.IssueExtractor(
38+
feature_extractors, cleanup_functions
39+
),
40+
),
41+
(
42+
"union",
43+
ColumnTransformer(
44+
[
45+
("data", DictVectorizer(), "data"),
46+
("title", self.text_vectorizer(min_df=0.0001), "title"),
47+
(
48+
"first_comment",
49+
self.text_vectorizer(min_df=0.0001),
50+
"first_comment",
51+
),
52+
]
53+
),
54+
),
55+
]
56+
)
57+
58+
self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
59+
self.clf.set_params(predictor="cpu_predictor")
60+
61+
def get_labels(self):
62+
classes = {}
63+
64+
for issue in github.get_issues():
65+
for label in issue["labels"]:
66+
if label["name"] == "browser-firefox":
67+
classes[issue["number"]] = 1
68+
69+
if issue["number"] not in classes:
70+
classes[issue["number"]] = 0
71+
72+
logger.info(
73+
f"{sum(1 for label in classes.values() if label == 1)} issues belong to Firefox"
74+
)
75+
logger.info(
76+
f"{sum(1 for label in classes.values() if label == 0)} issues do not belong to Firefox"
77+
)
78+
79+
return classes, [0, 1]
80+
81+
def get_feature_names(self):
82+
return self.extraction_pipeline.named_steps["union"].get_feature_names()

0 commit comments

Comments
 (0)