15
15
import java .util .List ;
16
16
import java .util .Locale ;
17
17
import java .util .Map ;
18
+ import java .util .Objects ;
18
19
import java .util .Optional ;
19
20
import java .util .logging .Level ;
20
21
import java .util .logging .Logger ;
22
+ import java .util .stream .Collectors ;
21
23
22
24
/**
23
25
* The javadoc.io site relies on someone accessing the page for an artifact version in order to
26
28
* pages on the javadoc.io site to trigger updates.
27
29
*/
28
30
public final class JavaDocsCrawler {
29
- private static final String GROUP = "io.opentelemetry" ;
31
+ // Track list of groups and the minimum artifact versions that should be crawled. Update to the
32
+ // latest periodically to avoid crawling artifacts that stopped being published.
33
+ private static final Map <String , String > GROUPS_AND_MIN_VERSION =
34
+ Map .of (
35
+ "io.opentelemetry" , "1.49.0" ,
36
+ "io.opentelemetry.instrumentation" , "2.15.0" ,
37
+ "io.opentelemetry.contrib" , "1.46.0" ,
38
+ "io.opentelemetry.semconv" , "1.32.0" ,
39
+ "io.opentelemetry.proto" , "1.3.2" );
40
+
30
41
private static final String MAVEN_CENTRAL_BASE_URL =
31
42
"https://search.maven.org/solrsearch/select?q=g:" ;
32
43
private static final String JAVA_DOCS_BASE_URL = "https://javadoc.io/doc/" ;
@@ -41,23 +52,34 @@ public final class JavaDocsCrawler {
41
52
42
53
public static void main (String [] args ) throws Exception {
43
54
HttpClient client = HttpClient .newHttpClient ();
44
- List <Artifact > artifacts = getArtifacts (client );
45
- if (artifacts .isEmpty ()) {
46
- logger .log (Level .SEVERE , "No artifacts found" );
47
- return ;
48
- }
49
- logger .info (String .format (Locale .ROOT , "Found %d artifacts" , artifacts .size ()));
50
55
51
- List <String > updated = crawlJavaDocs (client , artifacts );
52
- if (updated .isEmpty ()) {
53
- logger .info ("No updates were needed" );
54
- return ;
55
- }
56
+ for (Map .Entry <String , String > groupAndMinVersion : GROUPS_AND_MIN_VERSION .entrySet ()) {
57
+ String group = groupAndMinVersion .getKey ();
58
+
59
+ List <Artifact > artifacts = getArtifacts (client , group );
60
+ if (artifacts .isEmpty ()) {
61
+ logger .log (Level .SEVERE , "No artifacts found for group " + group );
62
+ continue ;
63
+ }
64
+ logger .info (
65
+ String .format (Locale .ROOT , "Found %d artifacts for group " + group , artifacts .size ()));
56
66
57
- logger .info ("Artifacts that triggered updates:\n " + String .join ("\n " , updated ));
67
+ List <Artifact > updated = crawlJavaDocs (client , groupAndMinVersion .getValue (), artifacts );
68
+ if (updated .isEmpty ()) {
69
+ logger .info ("No updates were needed for group " + group );
70
+ continue ;
71
+ }
72
+
73
+ logger .info (
74
+ "Artifacts that triggered updates for group "
75
+ + group
76
+ + ":\n "
77
+ + updated .stream ().map (Artifact ::toString ).collect (Collectors .joining ("\n " )));
78
+ }
58
79
}
59
80
60
- static List <Artifact > getArtifacts (HttpClient client ) throws IOException , InterruptedException {
81
+ static List <Artifact > getArtifacts (HttpClient client , String group )
82
+ throws IOException , InterruptedException {
61
83
int start = 0 ;
62
84
Integer numFound ;
63
85
List <Artifact > result = new ArrayList <>();
@@ -67,7 +89,7 @@ static List<Artifact> getArtifacts(HttpClient client) throws IOException, Interr
67
89
Thread .sleep (THROTTLE_MS ); // try not to DDoS the site, it gets knocked over easily
68
90
}
69
91
70
- Map <?, ?> map = queryMavenCentral (client , start );
92
+ Map <?, ?> map = queryMavenCentral (client , group , start );
71
93
72
94
numFound =
73
95
Optional .ofNullable (map )
@@ -93,26 +115,26 @@ private static List<Artifact> convertToArtifacts(Map<?, ?> map) {
93
115
List <Artifact > artifacts = new ArrayList <>();
94
116
for (Object doc : docs ) {
95
117
Map <?, ?> docMap = (Map <?, ?>) doc ;
96
- String artifact = ( String ) docMap .get ("a " );
97
- String version = ( String ) docMap .get ("latestVersion " );
98
- if ( artifact != null && version != null ) {
99
- artifacts . add ( new Artifact ( artifact , version ) );
100
- }
118
+ String group = Objects . requireNonNull (( String ) docMap .get ("g" ), "g " );
119
+ String artifact = Objects . requireNonNull (( String ) docMap .get ("a" ), "a " );
120
+ String version =
121
+ Objects . requireNonNull (( String ) docMap . get ( "latestVersion" ), "latestVersion" );
122
+ artifacts . add ( new Artifact ( Objects . requireNonNull ( group ), artifact , version ));
101
123
}
102
124
return artifacts ;
103
125
})
104
126
.orElseGet (ArrayList ::new );
105
127
}
106
128
107
- private static Map <?, ?> queryMavenCentral (HttpClient client , int start )
129
+ private static Map <?, ?> queryMavenCentral (HttpClient client , String group , int start )
108
130
throws IOException , InterruptedException {
109
131
URI uri =
110
132
URI .create (
111
133
String .format (
112
134
Locale .ROOT ,
113
135
"%s%s&rows=%d&start=%d&wt=json" ,
114
136
MAVEN_CENTRAL_BASE_URL ,
115
- GROUP ,
137
+ group ,
116
138
PAGE_SIZE ,
117
139
start ));
118
140
@@ -122,21 +144,35 @@ private static List<Artifact> convertToArtifacts(Map<?, ?> map) {
122
144
if (response .statusCode () != 200 ) {
123
145
logger .log (
124
146
Level .SEVERE ,
125
- "Unexpected response code: " + response .statusCode () + ": " + response .body ());
147
+ "Unexpected response code "
148
+ + response .statusCode ()
149
+ + " for uri: "
150
+ + uri .toASCIIString ()
151
+ + "\n "
152
+ + response .body ());
126
153
throw new IOException ("Unable to pull Maven central artifacts list" );
127
154
}
128
155
return objectMapper .readValue (response .body (), Map .class );
129
156
}
130
157
131
- static List <String > crawlJavaDocs (HttpClient client , List <Artifact > artifacts )
158
+ static List <Artifact > crawlJavaDocs (
159
+ HttpClient client , String minVersion , List <Artifact > artifacts )
132
160
throws IOException , InterruptedException {
133
- List <String > updatedArtifacts = new ArrayList <>();
161
+ List <Artifact > updatedArtifacts = new ArrayList <>();
134
162
135
163
for (Artifact artifact : artifacts ) {
164
+ if (artifact .getVersion ().compareTo (minVersion ) < 0 ) {
165
+ logger .info (
166
+ String .format (
167
+ "Skipping crawling %s due to version %s being less than minVersion %s" ,
168
+ artifact , artifact .getVersion (), minVersion ));
169
+ continue ;
170
+ }
171
+
136
172
String [] parts = artifact .getName ().split ("-" );
137
173
StringBuilder path = new StringBuilder ();
138
174
path .append (JAVA_DOCS_BASE_URL )
139
- .append (GROUP )
175
+ .append (artifact . getGroup () )
140
176
.append ("/" )
141
177
.append (artifact .getName ())
142
178
.append ("/" )
@@ -146,6 +182,7 @@ static List<String> crawlJavaDocs(HttpClient client, List<Artifact> artifacts)
146
182
.append ("/package-summary.html" );
147
183
148
184
HttpRequest crawlRequest = HttpRequest .newBuilder (URI .create (path .toString ())).GET ().build ();
185
+ logger .info (String .format ("Crawling %s at: %s" , artifact , path ));
149
186
HttpResponse <String > crawlResponse =
150
187
client .send (crawlRequest , HttpResponse .BodyHandlers .ofString ());
151
188
@@ -156,15 +193,15 @@ static List<String> crawlJavaDocs(HttpClient client, List<Artifact> artifacts)
156
193
String .format (
157
194
Locale .ROOT ,
158
195
"Crawl failed for %s with status code %d at URL %s\n Response: %s" ,
159
- artifact . getName () ,
196
+ artifact ,
160
197
crawlResponse .statusCode (),
161
198
path ,
162
199
crawlResponse .body ()));
163
200
continue ;
164
201
}
165
202
166
203
if (crawlResponse .body ().contains (JAVA_DOC_DOWNLOADED_TEXT )) {
167
- updatedArtifacts .add (artifact . getName () );
204
+ updatedArtifacts .add (artifact );
168
205
}
169
206
170
207
Thread .sleep (THROTTLE_MS ); // some light throttling
0 commit comments