Skip to content

Replace preq with fetch in tests #109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 34 additions & 28 deletions test/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

const cheerio = require( 'cheerio' );
const meta = require( '../index' );
const preq = require( 'preq' ); // Promisified Request library
const assert = require( './utils/assert.js' );
const fs = require( 'fs' );

Expand All @@ -17,91 +16,98 @@ describe( 'errors', function () {

this.timeout( 40000 );

function fetchBody( url ) {
return fetch( url ).then( ( res ) => {
// res.body is a ReadableStream of a Uint8Array, but we just want the string
return res.text();
} );
}

it( 'should not find schema.org metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseSchemaOrgMicrodata( $ );
return assert.fails( prom );
} );
} );

it( 'should not find BE Press metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseBEPress( $ );
return assert.fails( prom );
} );
} );

it( 'should not find coins metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseCOinS( $ );
return assert.fails( prom );
} );
} );

it( 'should not find dublin core metadata, reject promise', () => {
const url = 'http://www.laprovence.com/article/actualites/3411272/marseille-un-proche-du-milieu-corse-abattu-par-balles-en-plein-jour.html';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseDublinCore( $ );
return assert.fails( prom );
} );
} );

it( 'should not find highwire press metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseHighwirePress( $ );
return assert.fails( prom );
} );
} );

it( 'should not find open graph metadata, reject promise', () => {
const url = 'http://www.example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseOpenGraph( $ );
return assert.fails( prom );
} );
} );

it( 'should not find eprints metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseEprints( $ );
return assert.fails( prom );
} );
} );

it( 'should not find twitter metadata, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseTwitter( $ );
return assert.fails( prom );
} );
} );

it( 'should not find JSON-LD, reject promise', () => {
const url = 'http://example.com';
return preq.get( url )
.then( ( callRes ) => {
const $ = cheerio.load( callRes.body );
return fetchBody( url )
.then( ( body ) => {
const $ = cheerio.load( body );
const prom = meta.parseJsonLd( $ );
return assert.fails( prom );
} );
Expand Down
33 changes: 17 additions & 16 deletions test/scraping.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

const meta = require( '../index' );
const assert = require( 'assert' );
const preq = require( 'preq' );
const cheerio = require( 'cheerio' );

// mocha defines to avoid eslint breakage
Expand All @@ -16,12 +15,15 @@ describe( 'scraping', function () {
const acceptHeader = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';

function getWithHeaders( url ) {
return preq.get( {
uri: url,
return fetch( url, {
method: 'GET',
headers: {
'User-Agent': userAgent,
Accept: acceptHeader
}
} ).then( ( res ) => {
// res.body is a ReadableStream of a Uint8Array, but we just want the string
return res.text();
} );
}

Expand Down Expand Up @@ -54,11 +56,10 @@ describe( 'scraping', function () {
describe( 'parseBEPress function', () => {
it( 'should get BE Press metadata tags', () => {
const url = 'http://biostats.bepress.com/harvardbiostat/paper154/';
return getWithHeaders( url ).then( ( callRes ) => {
return getWithHeaders( url ).then( ( body ) => {
const expectedAuthors = [ 'Claggett, Brian', 'Xie, Minge', 'Tian, Lu' ];
const expectedAuthorInstitutions = [ 'Harvard', 'Rutgers University - New Brunswick/Piscataway', 'Stanford University School of Medicine' ];
const chtml = cheerio.load( callRes.body );

const chtml = cheerio.load( body );
return meta.parseBEPress( chtml )
.then( ( results ) => {
assert.deepStrictEqual( results.author, expectedAuthors );
Expand All @@ -78,8 +79,8 @@ describe( 'scraping', function () {
describe( 'parseCOinS function', () => {
it( 'should get COinS metadata', () => {
const url = 'https://en.wikipedia.org/wiki/Viral_phylodynamics';
return getWithHeaders( url ).then( ( callRes ) => {
const chtml = cheerio.load( callRes.body );
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseCOinS( chtml )
.then( ( results ) => {
assert.ok( Array.isArray( results ), `Expected Array, got ${ typeof results }` );
Expand All @@ -93,8 +94,8 @@ describe( 'scraping', function () {
describe( 'parseEPrints function', () => {
it( 'should get EPrints metadata', () => {
const url = 'http://eprints.gla.ac.uk/113711/';
return getWithHeaders( url ).then( ( callRes ) => {
const chtml = cheerio.load( callRes.body );
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
const expectedAuthors = [ 'Gatherer, Derek', 'Kohl, Alain' ];

return meta.parseEprints( chtml )
Expand All @@ -112,8 +113,8 @@ describe( 'scraping', function () {
it( 'should get html lang parameter', () => {
const expected = 'fr';
const url = 'http://www.lemonde.fr';
return getWithHeaders( url ).then( ( callRes ) => {
const chtml = cheerio.load( callRes.body );
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseGeneral( chtml ).then( ( results ) => {
assert.strictEqual( results.lang, expected );
} );
Expand All @@ -123,8 +124,8 @@ describe( 'scraping', function () {
it( 'should get html dir parameter', () => {
const expected = 'rtl';
const url = 'https://www.iranrights.org/fa/';
return getWithHeaders( url ).then( ( callRes ) => {
const chtml = cheerio.load( callRes.body );
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseGeneral( chtml ).then( ( results ) => {
assert.strictEqual( results.dir, expected );
} );
Expand All @@ -134,8 +135,8 @@ describe( 'scraping', function () {

it( 'should not have any undefined values', () => {
const url = 'http://web.archive.org/web/20220127144804/https://www.cnet.com/special-reports/vr101/';
return getWithHeaders( url ).then( ( callRes ) => {
const chtml = cheerio.load( callRes.body );
return getWithHeaders( url ).then( ( body ) => {
const chtml = cheerio.load( body );
return meta.parseAll( chtml )
.then( ( results ) => {
Object.keys( results ).forEach( ( metadataType ) => {
Expand Down