| var request = require('request').defaults({ encoding: null }); | var request = require('request').defaults({ encoding: null }); | ||||
| module.exports.apartment = function($) { | module.exports.apartment = function($) { | ||||
| var result = { | |||||
| // images: request.get($('.aspectRatioImage').find('img')[0].attribs.src, function (error, response, body) { | |||||
| // if (!error && response.statusCode == 200) { | |||||
| // data = "data:" + response.headers["content-type"] + ";base64," + Buffer.from(body).toString('base64'); | |||||
| // return data; | |||||
| // } | |||||
| // }), | |||||
| var result = { | |||||
| images: $('.aspectRatioImage').map(function(){ | |||||
| return { src : $(this).find('img').attr("src") }; | |||||
| }).get(), | |||||
| name: $('#propertyName').text(), | name: $('#propertyName').text(), | ||||
| description: $('#descriptionSection>p').html(), | description: $('#descriptionSection>p').html(), | ||||
| rentInfo : $('#priceBedBathAreaInfoWrapper .rentInfoDetail').html(), | |||||
| review: $('.propertyReviewContainer .reviewRating').html(), | |||||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||||
| address: { | address: { | ||||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | ||||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), |
| var apartments = require('./apartments.js'); | var apartments = require('./apartments.js'); | ||||
| var houses = require('./houses.js'); | var houses = require('./houses.js'); | ||||
| var condos = require('./condos.js'); | |||||
| // jobs | // jobs | ||||
| var mongoUrl = config.get("mongo"); | var mongoUrl = config.get("mongo"); | ||||
| const dbo = client.db(database); | const dbo = client.db(database); | ||||
| let collection = dbo.collection('scrapes'); | let collection = dbo.collection('scrapes'); | ||||
| let scrape = await collection.findOne({ _id: _id }); | let scrape = await collection.findOne({ _id: _id }); | ||||
| scrape.sourceUrl = "https://www.apartments.com/new-york-ny/"; | |||||
| scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/"; | |||||
| const filterPage = await axios(scrape.sourceUrl); | const filterPage = await axios(scrape.sourceUrl); | ||||
| const html = filterPage.data; | const html = filterPage.data; | ||||
| for (const link of propertyLins){ | for (const link of propertyLins){ | ||||
| var response = await axios(link); | var response = await axios(link); | ||||
| var property = apartments.apartment(cheerio.load(response.data)); | |||||
| //apartments | |||||
| //var property = apartments.apartment(cheerio.load(response.data)); | |||||
| //houses | |||||
| //var property = houses.house(cheerio.load(response.data)); | |||||
| //condos | |||||
| var property = condos.condo(cheerio.load(response.data)); | |||||
| properties.push(property); | properties.push(property); | ||||
| console.log(`${link} scraped.`); | console.log(`${link} scraped.`); | ||||
| } | } |
| module.exports.condo = function($) { | |||||
| var result = { | |||||
| images: $('.aspectRatioImage').map(function(){ | |||||
| return { src : $(this).find('img').attr("src") }; | |||||
| }).get(), | |||||
| name: $('#propertyName').text(), | |||||
| description: $('#descriptionSection>p').html(), | |||||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||||
| address: { | |||||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | |||||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | |||||
| zip: { | |||||
| state: $('.propertyAddressContainer .stateZipContainer>span:nth-child(1)').text(), | |||||
| code: $('.propertyAddressContainer .stateZipContainer>span:nth-child(2)').text(), | |||||
| } | |||||
| }, | |||||
| rentInfo : $('#priceBedBathAreaInfoWrapper .column:eq(0)').find(".rentInfoDetail").html(), | |||||
| features: $('#priceBedBathAreaInfoWrapper .column:not(:first)').map(function () { | |||||
| return $(this).find('.rentInfoDetail').text(); | |||||
| }).get(), | |||||
| contact: { | |||||
| phone: $('.contactInfo .phoneNumber span').html(), | |||||
| agentFullName: $('.agentFullName').text(), | |||||
| }, | |||||
| }; | |||||
| var data = JSON.parse(JSON.stringify(result) | |||||
| .split(' ').join('') | |||||
| .split(' ').join('') | |||||
| .split(' ').join('') | |||||
| .split(' ').join('') | |||||
| .split('\\n').join('')); | |||||
| return data; | |||||
| } |
| module.exports.house = function($) { | module.exports.house = function($) { | ||||
| var result = { | var result = { | ||||
| images: $('.aspectRatioImage').map(function(){ | |||||
| return { src : $(this).find('img').attr("src") }; | |||||
| }).get(), | |||||
| name: $('#propertyName').text(), | name: $('#propertyName').text(), | ||||
| description: $('#descriptionSection>p').html(), | description: $('#descriptionSection>p').html(), | ||||
| lastUpdate : $('.freshnessUserActionsContainer .lastUpdated>span').text(), | |||||
| address: { | address: { | ||||
| street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | street: $('.propertyAddressContainer h2>span:nth-child(1)').text(), | ||||
| city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | city: $('.propertyAddressContainer h2>span:nth-child(2)').text(), | ||||
| code: $('.propertyAddressContainer .stateZipContainer>span:nth-child(2)').text(), | code: $('.propertyAddressContainer .stateZipContainer>span:nth-child(2)').text(), | ||||
| } | } | ||||
| }, | }, | ||||
| rentInfo : $('#priceBedBathAreaInfoWrapper .column:eq(0)').find(".rentInfoDetail").html(), | |||||
| features: $('#priceBedBathAreaInfoWrapper .column:not(:first)').map(function () { | |||||
| return $(this).find('.rentInfoDetail').text(); | |||||
| }).get(), | |||||
| contact: { | contact: { | ||||
| phone: $('.contactInfo .phoneNumber span').html(), | phone: $('.contactInfo .phoneNumber span').html(), | ||||
| //language: $('.contactInfo .languages span').html(), | |||||
| url: $('.contactInfo .mortar-wrapper a').attr('href'), | |||||
| // officeHours: $('.daysHoursContainer').map(function () { | |||||
| // return { | |||||
| // days: $(this).find('.days').text(), | |||||
| // hours: $(this).find('.hours').text(), | |||||
| // }; | |||||
| // }).get() | |||||
| agentFullName: $('.agentFullName').text(), | |||||
| }, | }, | ||||
| }; | }; | ||||
| var data = JSON.parse(JSON.stringify(result) | var data = JSON.parse(JSON.stringify(result) |