| let collection = dbo.collection('scrapes'); | let collection = dbo.collection('scrapes'); | ||||
| let scrape = await collection.findOne({ _id: _id }); | let scrape = await collection.findOne({ _id: _id }); | ||||
| const filterPage = await axios(scrape.sourceUrl); | |||||
| const html = filterPage.data; | |||||
| const $ = cheerio.load(html); | |||||
| const propertyLins = $('#placardContainer .property-link').map(function () { | |||||
| return $(this).attr('href'); | |||||
| }).get(); | |||||
| var properties = []; | |||||
| for (const link of propertyLins){ | |||||
| var response = await axios(link); | |||||
| var property = apartments.apartment(cheerio.load(response.data)); | |||||
| properties.push(property); | |||||
| console.log(`${link} scraped.`); | |||||
| for (var page = 1; page <= scrape.pageCount; page++) { | |||||
| console.log("scrapping page " + page) | |||||
| const filterPage = await axios(scrape.sourceUrl + `/${page}`); | |||||
| const html = filterPage.data; | |||||
| const $ = cheerio.load(html); | |||||
| const propertyLinks = $('#placardContainer .property-link').map(function () { | |||||
| return $(this).attr('href'); | |||||
| }).get(); | |||||
| var properties = []; | |||||
| for (const link of propertyLinks) { | |||||
| try { | |||||
| var response = await axios(link); | |||||
| var property = apartments.apartment(cheerio.load(response.data)); | |||||
| properties.push(property); | |||||
| console.log(`${link} scraped.`); | |||||
| }catch(err){ | |||||
| console.error(`${link} scrape failed.`); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); | await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); | ||||
| }); | }); | ||||
| }); | }); | ||||
| // app.set('json spaces', 2); | |||||
| // const axios = require('axios'); | |||||
| // const cheerio = require('cheerio'); | |||||
| // const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/'; | |||||
| // app.get('/', (req, res) => { | |||||
| // axios(url).then(response => { | |||||
| // const html = response.data; | |||||
| // const $ = cheerio.load(html); | |||||
| // var data = apartments.apartment($); | |||||
| // res.json(data); | |||||
| // }); | |||||
| // }); | |||||
| // app.get('/houses/*', (req, res) => { | |||||
| // var url = req.params[0]; | |||||
| // axios(url).then(response => { | |||||
| // const html = response.data; | |||||
| // const $ = cheerio.load(html); | |||||
| // var data = houses.house($); | |||||
| // res.json(data); | |||||
| // }); | |||||
| // }); | |||||
| // app.get('/filters/*', async (req, res) => { | |||||
| // var url = req.params[0]; | |||||
| // const filterPage = await axios(url); | |||||
| // const html = filterPage.data; | |||||
| // const $ = cheerio.load(html); | |||||
| // const propertyLins = $('#placardContainer .property-link').map(function () { | |||||
| // return $(this).attr('href'); | |||||
| // }).get(); | |||||
| // var properties = []; | |||||
| // for (const link of propertyLins){ | |||||
| // var response = await axios(link); | |||||
| // var property = apartments.apartment(cheerio.load(response.data)); | |||||
| // properties.push(property); | |||||
| // } | |||||
| // res.json(properties); | |||||
| // }); | |||||
| // app.get('/apartments/*', (req, res) => { | |||||
| // var url = req.params[0]; | |||||
| // axios(url).then(response => { | |||||
| // const html = response.data; | |||||
| // const $ = cheerio.load(html); | |||||
| // var data = apartments.apartment($); | |||||
| // createListing(client, data); | |||||
| // res.json(data); | |||||
| // }); | |||||
| // }); | |||||
| app.get("/scrapes", async (req, res) => { | app.get("/scrapes", async (req, res) => { | ||||
| try { | try { | ||||
| const dbo = client.db(database); | const dbo = client.db(database); | ||||
| // query builder | // query builder | ||||
| var query = `https://www.apartments.com`; | var query = `https://www.apartments.com`; | ||||
| if(type){ | |||||
| if (type) { | |||||
| query += `/${type}`; | query += `/${type}`; | ||||
| } | } | ||||
| if(location){ | |||||
| if (location) { | |||||
| var locationQuery = location.replace(", ", "-").replace(" ", "-").toLowerCase(); | var locationQuery = location.replace(", ", "-").replace(" ", "-").toLowerCase(); | ||||
| query += `/${locationQuery}`; | query += `/${locationQuery}`; | ||||
| } | } | ||||
| if(beds){ | |||||
| if (beds) { | |||||
| query += `/${beds}-bedrooms`; | query += `/${beds}-bedrooms`; | ||||
| } | } | ||||
| if(price){ | |||||
| if(beds){ | |||||
| if (price) { | |||||
| if (beds) { | |||||
| query += `-over-${price}`; | query += `-over-${price}`; | ||||
| }else{ | |||||
| } else { | |||||
| query += `/over-${price}`; | query += `/over-${price}`; | ||||
| } | } | ||||
| } | } | ||||
| if(lifestyle){ | |||||
| if (lifestyle) { | |||||
| query += `/${lifestyle}`; | query += `/${lifestyle}`; | ||||
| } | } | ||||
| console.log(query); | console.log(query); | ||||
| //todo: save data into the database | |||||
| const filterPage = await axios(query); | |||||
| const html = filterPage.data; | |||||
| const $ = cheerio.load(html); | |||||
| var $pageRange = $(".pageRange"); | |||||
| var pagesCount = 0; | |||||
| var resultCount = 0; | |||||
| if (!$pageRange.length) { | |||||
| let propertyLinks = $('#placardContainer .property-link').map(function () { | |||||
| return $(this).attr('href'); | |||||
| }).get(); | |||||
| if (!propertyLinks.length) { | |||||
| console.error("No results"); | |||||
| return res.status(404).json(); | |||||
| } | |||||
| resultCount = propertyLinks.length; | |||||
| } else { | |||||
| pagesCount = $pageRange.text().slice($pageRange.text().lastIndexOf("of ") + 3); | |||||
| resultCount = pagesCount * 25; | |||||
| } | |||||
| try { | try { | ||||
| const dbo = client.db(database); | const dbo = client.db(database); | ||||
| let collection = dbo.collection('scrapes'); | let collection = dbo.collection('scrapes'); | ||||
| const dt = new Date(); | |||||
| dt.setSeconds( dt.getSeconds() + resultCount ); | |||||
| let res = await collection.insertOne({ | let res = await collection.insertOne({ | ||||
| count: 21, | |||||
| estimate: Date.now(), | |||||
| count: resultCount, | |||||
| pageCount: pagesCount, | |||||
| estimate: dt, | |||||
| sourceUrl: query, | sourceUrl: query, | ||||
| location: location, | location: location, | ||||
| filters: [ | filters: [ |