| @@ -1,14 +1,14 @@ | |||
| var request = require('request').defaults({ encoding: null }); | |||
| module.exports.apartment = function($) { | |||
| var result = { | |||
| // images: request.get($('.aspectRatioImage').find('img')[0].attribs.src, function (error, response, body) { | |||
| // if (!error && response.statusCode == 200) { | |||
| // data = "data:" + response.headers["content-type"] + ";base64," + Buffer.from(body).toString('base64'); | |||
| // return data; | |||
| // } | |||
| // }), | |||
| var result = { | |||
| images: $('.aspectRatioImage').map(function(){ | |||
| return { src : $(this).find('img').attr("src") }; | |||
| }).get(), | |||
| name: $('#propertyName').text(), | |||
| description: $('#descriptionSection>p').html(), | |||
| rentInfo : $('#priceBedBathAreaInfoWrapper .rentInfoDetail').html(), | |||
| review: $('.propertyReviewContainer .reviewRating').html(), | |||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||
| address: { | |||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | |||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | |||
| @@ -8,6 +8,7 @@ const ObjectID = require('mongodb').ObjectID; | |||
| var apartments = require('./apartments.js'); | |||
| var houses = require('./houses.js'); | |||
| var condos = require('./condos.js'); | |||
| // jobs | |||
| var mongoUrl = config.get("mongo"); | |||
| @@ -20,7 +21,7 @@ agenda.define('scrape', async function (job, done) { | |||
| const dbo = client.db(database); | |||
| let collection = dbo.collection('scrapes'); | |||
| let scrape = await collection.findOne({ _id: _id }); | |||
| scrape.sourceUrl = "https://www.apartments.com/new-york-ny/"; | |||
| scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/"; | |||
| const filterPage = await axios(scrape.sourceUrl); | |||
| const html = filterPage.data; | |||
| @@ -34,7 +35,12 @@ agenda.define('scrape', async function (job, done) { | |||
| for (const link of propertyLins){ | |||
| var response = await axios(link); | |||
| var property = apartments.apartment(cheerio.load(response.data)); | |||
| //apartments | |||
| //var property = apartments.apartment(cheerio.load(response.data)); | |||
| //houses | |||
| //var property = houses.house(cheerio.load(response.data)); | |||
| //condos | |||
| var property = condos.condo(cheerio.load(response.data)); | |||
| properties.push(property); | |||
| console.log(`${link} scraped.`); | |||
| } | |||
| @@ -0,0 +1,34 @@ | |||
| module.exports.condo = function($) { | |||
| var result = { | |||
| images: $('.aspectRatioImage').map(function(){ | |||
| return { src : $(this).find('img').attr("src") }; | |||
| }).get(), | |||
| name: $('#propertyName').text(), | |||
| description: $('#descriptionSection>p').html(), | |||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||
| address: { | |||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | |||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | |||
| zip: { | |||
| state: $('.propertyAddressContainer .stateZipContainer>span:nth-child(1)').text(), | |||
| code: $('.propertyAddressContainer .stateZipContainer>span:nth-child(2)').text(), | |||
| } | |||
| }, | |||
| rentInfo : $('#priceBedBathAreaInfoWrapper .column:eq(0)').find(".rentInfoDetail").html(), | |||
| features: $('#priceBedBathAreaInfoWrapper .column:not(:first)').map(function () { | |||
| return $(this).find('.rentInfoDetail').text(); | |||
| }).get(), | |||
| contact: { | |||
| phone: $('.contactInfo .phoneNumber span').html(), | |||
| agentFullName: $('.agentFullName').text(), | |||
| }, | |||
| }; | |||
| var data = JSON.parse(JSON.stringify(result) | |||
| .split(' ').join('') | |||
| .split(' ').join('') | |||
| .split(' ').join('') | |||
| .split(' ').join('') | |||
| .split('\\n').join('')); | |||
| return data; | |||
| } | |||
| @@ -1,7 +1,11 @@ | |||
| module.exports.house = function($) { | |||
| var result = { | |||
| images: $('.aspectRatioImage').map(function(){ | |||
| return { src : $(this).find('img').attr("src") }; | |||
| }).get(), | |||
| name: $('#propertyName').text(), | |||
| description: $('#descriptionSection>p').html(), | |||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||
| address: { | |||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | |||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | |||
| @@ -10,16 +14,14 @@ module.exports.house = function($) { | |||
| code: $('.propertyAddressContainer .stateZipContainer>span:nth-child(2)').text(), | |||
| } | |||
| }, | |||
| rentInfo : $('#priceBedBathAreaInfoWrapper .column:eq(0)').find(".rentInfoDetail").html(), | |||
| features: $('#priceBedBathAreaInfoWrapper .column:not(:first)').map(function () { | |||
| return $(this).find('.rentInfoDetail').text(); | |||
| }).get(), | |||
| contact: { | |||
| phone: $('.contactInfo .phoneNumber span').html(), | |||
| //language: $('.contactInfo .languages span').html(), | |||
| url: $('.contactInfo .mortar-wrapper a').attr('href'), | |||
| // officeHours: $('.daysHoursContainer').map(function () { | |||
| // return { | |||
| // days: $(this).find('.days').text(), | |||
| // hours: $(this).find('.hours').text(), | |||
| // }; | |||
| // }).get() | |||
| agentFullName: $('.agentFullName').text(), | |||
| }, | |||
| }; | |||
| var data = JSON.parse(JSON.stringify(result) | |||