瀏覽代碼

Scrappe expanded

master
diligent 4 年之前
父節點
當前提交
2b3b0b719c
共有 2 個文件被更改,包括 7 次插入1 次删除
  1. 1
    0
      apartments.js
  2. 6
    1
      app.js

+ 1
- 0
apartments.js 查看文件

@@ -21,6 +21,7 @@ module.exports.apartment = function($) {
phone: $('.contactInfo .phoneNumber span').html(),
language: $('.contactInfo .languages span').html(),
url: $('.contactInfo .mortar-wrapper a').attr('href'),
todayHours: $('.todaysHoursContactContainer .todaysHours>span').text(),
officeHours: $('.daysHoursContainer').map(function () {
return {
days: $(this).find('.days').text(),

+ 6
- 1
app.js 查看文件

@@ -21,7 +21,8 @@ agenda.define('scrape', async function (job, done) {
let collection = dbo.collection('scrapes');
let scrape = await collection.findOne({ _id: _id });

for (var page = 1; page <= scrape.pageCount; page++) {
// for (var page = 1; page <= scrape.pageCount; page++) {
for (var page = 1; page <= 1; page++) {
console.log("scrapping page " + page)

const filterPage = await axios(scrape.sourceUrl + `/${page}`);
@@ -33,13 +34,17 @@ agenda.define('scrape', async function (job, done) {
}).get();

var properties = [];
// var limit = 0;
for (const link of propertyLinks) {
// if(limit == 1) break;
try {
var response = await axios(link);

var property = apartments.apartment(cheerio.load(response.data));
property.url = link;
properties.push(property);
console.log(`${link} scraped.`);
limit++;
}catch(err){
console.error(`${link} scrape failed.`);
}

Loading…
取消
儲存