|
|
|
@@ -1,243 +0,0 @@ |
|
|
|
const config = require('config'); |
|
|
|
const axios = require("axios"); |
|
|
|
const cheerio = require("cheerio"); |
|
|
|
const express = require('express'); |
|
|
|
const cors = require('cors'); |
|
|
|
const MongoClient = require('mongodb').MongoClient; |
|
|
|
const ObjectID = require('mongodb').ObjectID; |
|
|
|
|
|
|
|
var apartments = require('./apartments.js'); |
|
|
|
var houses = require('./houses.js'); |
|
|
|
var condos = require('./condos.js'); |
|
|
|
|
|
|
|
// jobs |
|
|
|
var mongoUrl = config.get("mongo"); |
|
|
|
var agendaDb = config.get("agenda"); |
|
|
|
const Agenda = require('agenda').Agenda; |
|
|
|
const agenda = new Agenda({ db: { address: agendaDb } }); |
|
|
|
agenda.define('scrape', async function (job, done) { |
|
|
|
const { _id } = job.attrs.data; |
|
|
|
try { |
|
|
|
const dbo = client.db(database); |
|
|
|
let collection = dbo.collection('scrapes'); |
|
|
|
let scrape = await collection.findOne({ _id: _id }); |
|
|
|
scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/"; |
|
|
|
|
|
|
|
const filterPage = await axios(scrape.sourceUrl); |
|
|
|
const html = filterPage.data; |
|
|
|
const $ = cheerio.load(html); |
|
|
|
|
|
|
|
const propertyLins = $('#placardContainer .property-link').map(function () { |
|
|
|
return $(this).attr('href'); |
|
|
|
}).get(); |
|
|
|
|
|
|
|
var properties = []; |
|
|
|
for (const link of propertyLins){ |
|
|
|
var response = await axios(link); |
|
|
|
|
|
|
|
//apartments |
|
|
|
//var property = apartments.apartment(cheerio.load(response.data)); |
|
|
|
//houses |
|
|
|
//var property = houses.house(cheerio.load(response.data)); |
|
|
|
//condos |
|
|
|
var property = condos.condo(cheerio.load(response.data)); |
|
|
|
properties.push(property); |
|
|
|
console.log(`${link} scraped.`); |
|
|
|
} |
|
|
|
|
|
|
|
await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } }); |
|
|
|
console.log(`${_id} scraped.`); |
|
|
|
return done(); |
|
|
|
} catch (err) { |
|
|
|
console.log(err); |
|
|
|
} |
|
|
|
}); |
|
|
|
(async function () { |
|
|
|
await agenda.start(); |
|
|
|
})(); |
|
|
|
|
|
|
|
// express application |
|
|
|
const app = express(); |
|
|
|
|
|
|
|
app.use(express.json()); |
|
|
|
app.use(cors()); |
|
|
|
|
|
|
|
// database setup |
|
|
|
var mongoUrl = config.get("mongo"); |
|
|
|
var database = config.get("database"); |
|
|
|
|
|
|
|
var client = undefined; |
|
|
|
MongoClient.connect(mongoUrl, function (err, db) { |
|
|
|
if (err) throw err; |
|
|
|
console.log("Database created!"); |
|
|
|
console.log(mongoUrl); |
|
|
|
client = db; |
|
|
|
|
|
|
|
var dbo = db.db(database); |
|
|
|
dbo.createCollection("scrapes", function (err, res) { |
|
|
|
if (err) { |
|
|
|
console.log("Collection already created!"); |
|
|
|
return; |
|
|
|
} |
|
|
|
console.log("Collection created!"); |
|
|
|
}); |
|
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// app.set('json spaces', 2); |
|
|
|
|
|
|
|
// const axios = require('axios'); |
|
|
|
// const cheerio = require('cheerio'); |
|
|
|
|
|
|
|
|
|
|
|
// const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/'; |
|
|
|
// app.get('/', (req, res) => { |
|
|
|
// axios(url).then(response => { |
|
|
|
// const html = response.data; |
|
|
|
// const $ = cheerio.load(html); |
|
|
|
|
|
|
|
// var data = apartments.apartment($); |
|
|
|
// res.json(data); |
|
|
|
// }); |
|
|
|
// }); |
|
|
|
// app.get('/houses/*', (req, res) => { |
|
|
|
// var url = req.params[0]; |
|
|
|
// axios(url).then(response => { |
|
|
|
// const html = response.data; |
|
|
|
// const $ = cheerio.load(html); |
|
|
|
|
|
|
|
// var data = houses.house($); |
|
|
|
// res.json(data); |
|
|
|
// }); |
|
|
|
// }); |
|
|
|
// app.get('/filters/*', async (req, res) => { |
|
|
|
// var url = req.params[0]; |
|
|
|
// const filterPage = await axios(url); |
|
|
|
// const html = filterPage.data; |
|
|
|
// const $ = cheerio.load(html); |
|
|
|
|
|
|
|
// const propertyLins = $('#placardContainer .property-link').map(function () { |
|
|
|
// return $(this).attr('href'); |
|
|
|
// }).get(); |
|
|
|
|
|
|
|
// var properties = []; |
|
|
|
// for (const link of propertyLins){ |
|
|
|
// var response = await axios(link); |
|
|
|
|
|
|
|
// var property = apartments.apartment(cheerio.load(response.data)); |
|
|
|
// properties.push(property); |
|
|
|
// } |
|
|
|
|
|
|
|
// res.json(properties); |
|
|
|
// }); |
|
|
|
// app.get('/apartments/*', (req, res) => { |
|
|
|
// var url = req.params[0]; |
|
|
|
// axios(url).then(response => { |
|
|
|
// const html = response.data; |
|
|
|
// const $ = cheerio.load(html); |
|
|
|
|
|
|
|
// var data = apartments.apartment($); |
|
|
|
|
|
|
|
// createListing(client, data); |
|
|
|
// res.json(data); |
|
|
|
|
|
|
|
|
|
|
|
// }); |
|
|
|
// }); |
|
|
|
|
|
|
|
app.get("/scrapes", async (req, res) => { |
|
|
|
try { |
|
|
|
const dbo = client.db(database); |
|
|
|
let collection = dbo.collection('scrapes'); |
|
|
|
let data = await collection.find({}).toArray(); |
|
|
|
return res.json(data); |
|
|
|
} catch (err) { |
|
|
|
console.log(err); |
|
|
|
return res.status(500).json(); |
|
|
|
} |
|
|
|
}); |
|
|
|
app.get("/scrapes/:id", async (req, res) => { |
|
|
|
const id = req.params.id; |
|
|
|
try { |
|
|
|
const dbo = client.db(database); |
|
|
|
let collection = dbo.collection('scrapes'); |
|
|
|
|
|
|
|
var o_id = new ObjectID(id); |
|
|
|
let data = await collection.findOne({ _id: o_id }); |
|
|
|
return res.json(data); |
|
|
|
} catch (err) { |
|
|
|
console.log(err); |
|
|
|
res.status(500).json(); |
|
|
|
} |
|
|
|
}); |
|
|
|
app.post("/scrapes/", async (req, res) => { |
|
|
|
const location = req.body.location; |
|
|
|
const price = req.body.price; |
|
|
|
const beds = req.body.beds; |
|
|
|
const type = req.body.type; |
|
|
|
const lifestyle = req.body.lifestyle; |
|
|
|
|
|
|
|
// query builder |
|
|
|
|
|
|
|
//todo: save data into the database |
|
|
|
try { |
|
|
|
const dbo = client.db(database); |
|
|
|
let collection = dbo.collection('scrapes'); |
|
|
|
|
|
|
|
let res = await collection.insertOne({ |
|
|
|
count: 21, |
|
|
|
estimate: Date.now(), |
|
|
|
sourceUrl: "https://www.apartments.com", |
|
|
|
location: location, |
|
|
|
filters: [ |
|
|
|
{ name: 'price', value: price }, |
|
|
|
{ name: 'beds', value: beds }, |
|
|
|
{ name: 'type', value: type }, |
|
|
|
{ name: 'lifestyle', value: lifestyle }, |
|
|
|
], |
|
|
|
status: "requested" |
|
|
|
}); |
|
|
|
console.log(res); |
|
|
|
} catch (err) { |
|
|
|
console.log(err); |
|
|
|
return res.status(500).json(); |
|
|
|
} |
|
|
|
return res.json(); |
|
|
|
}); |
|
|
|
app.patch("/scrapes/:id/execute", async (req, res) => { |
|
|
|
const id = req.params.id; |
|
|
|
|
|
|
|
try { |
|
|
|
const dbo = client.db(database); |
|
|
|
let collection = dbo.collection('scrapes'); |
|
|
|
|
|
|
|
var o_id = new ObjectID(id); |
|
|
|
var newvalues = { $set: { status: "pending" } }; |
|
|
|
await collection.updateOne({ _id: o_id }, newvalues); |
|
|
|
agenda.now('scrape', { _id: o_id }); |
|
|
|
return res.status(204).json(); |
|
|
|
} catch (err) { |
|
|
|
console.log(err); |
|
|
|
res.status(500).json(); |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
const port = 3333; |
|
|
|
app.listen(port, () => { |
|
|
|
console.log(`Example app listening at http://localhost:${port}`) |
|
|
|
}); |
|
|
|
|
|
|
|
// Handles graceful stopping of jobs |
|
|
|
function graceful() { |
|
|
|
agenda.stop(function () { |
|
|
|
client.close(function (e) { |
|
|
|
if (e) logger.error(e); |
|
|
|
process.exit(0); |
|
|
|
}); |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
process.on('SIGTERM', graceful); |
|
|
|
process.on('SIGINT', graceful); |