瀏覽代碼

Added houses....

newChanges
Nikola Ignjatovic 4 年之前
父節點
當前提交
16d23e8d6b
共有 2 個文件被更改,包括 8 次插入247 次删除
  1. 8
    4
      apartments.js
  2. 0
    243
      app.js

+ 8
- 4
apartments.js 查看文件

@@ -28,9 +28,12 @@ module.exports.apartment = function($) {
};
}).get()
},
features: $('#uniqueFeatures .uniqueAmenity').map(function () {
return $(this).find('span').text();
}).get(),
// features: $('#uniqueFeatures .uniqueAmenity').map(function () {
// return $(this).find('span').text();
// }).get(),
features: $('.amenitiesSection .amenityCard').map(function () {
return $(this).find('.amenityLabel').text();
}).get(),
prices: $('[data-tab-content-id=all]>.pricingGridItem').add('[data-tab-content-id=all]>.jsAvailableModels').map(function () {
return {
name: $(this).find('.modelName').text(),
@@ -42,8 +45,9 @@ module.exports.apartment = function($) {
area: $(this).find('.detailsTextWrapper>span:nth-child(3)').html(),
available: $(this).find('.availabilityInfo').html(),
},
url: $(this).find('.floorPlanButtonImage').attr('data-background-image'),
plan: $(this).find('.allAmenities>li').map(function () {
return {
return {
title: $(this).find('.topAmenity').html(),
amenities: $(this).find('.amenity').map(function () {
return $(this).text();

+ 0
- 243
app.js 查看文件

@@ -1,243 +0,0 @@
const config = require('config');
const axios = require("axios");
const cheerio = require("cheerio");
const express = require('express');
const cors = require('cors');
const MongoClient = require('mongodb').MongoClient;
const ObjectID = require('mongodb').ObjectID;

var apartments = require('./apartments.js');
var houses = require('./houses.js');
var condos = require('./condos.js');

// jobs
var mongoUrl = config.get("mongo");
var agendaDb = config.get("agenda");
const Agenda = require('agenda').Agenda;
const agenda = new Agenda({ db: { address: agendaDb } });
agenda.define('scrape', async function (job, done) {
const { _id } = job.attrs.data;
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');
let scrape = await collection.findOne({ _id: _id });
scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/";

const filterPage = await axios(scrape.sourceUrl);
const html = filterPage.data;
const $ = cheerio.load(html);

const propertyLins = $('#placardContainer .property-link').map(function () {
return $(this).attr('href');
}).get();

var properties = [];
for (const link of propertyLins){
var response = await axios(link);

//apartments
//var property = apartments.apartment(cheerio.load(response.data));
//houses
//var property = houses.house(cheerio.load(response.data));
//condos
var property = condos.condo(cheerio.load(response.data));
properties.push(property);
console.log(`${link} scraped.`);
}

await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } });
console.log(`${_id} scraped.`);
return done();
} catch (err) {
console.log(err);
}
});
(async function () {
await agenda.start();
})();

// express application
const app = express();

app.use(express.json());
app.use(cors());

// database setup
var mongoUrl = config.get("mongo");
var database = config.get("database");

var client = undefined;
MongoClient.connect(mongoUrl, function (err, db) {
if (err) throw err;
console.log("Database created!");
console.log(mongoUrl);
client = db;

var dbo = db.db(database);
dbo.createCollection("scrapes", function (err, res) {
if (err) {
console.log("Collection already created!");
return;
}
console.log("Collection created!");
});
});




// app.set('json spaces', 2);

// const axios = require('axios');
// const cheerio = require('cheerio');


// const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/';
// app.get('/', (req, res) => {
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);
// res.json(data);
// });
// });
// app.get('/houses/*', (req, res) => {
// var url = req.params[0];
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = houses.house($);
// res.json(data);
// });
// });
// app.get('/filters/*', async (req, res) => {
// var url = req.params[0];
// const filterPage = await axios(url);
// const html = filterPage.data;
// const $ = cheerio.load(html);

// const propertyLins = $('#placardContainer .property-link').map(function () {
// return $(this).attr('href');
// }).get();

// var properties = [];
// for (const link of propertyLins){
// var response = await axios(link);

// var property = apartments.apartment(cheerio.load(response.data));
// properties.push(property);
// }

// res.json(properties);
// });
// app.get('/apartments/*', (req, res) => {
// var url = req.params[0];
// axios(url).then(response => {
// const html = response.data;
// const $ = cheerio.load(html);

// var data = apartments.apartment($);

// createListing(client, data);
// res.json(data);


// });
// });

app.get("/scrapes", async (req, res) => {
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');
let data = await collection.find({}).toArray();
return res.json(data);
} catch (err) {
console.log(err);
return res.status(500).json();
}
});
app.get("/scrapes/:id", async (req, res) => {
const id = req.params.id;
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');

var o_id = new ObjectID(id);
let data = await collection.findOne({ _id: o_id });
return res.json(data);
} catch (err) {
console.log(err);
res.status(500).json();
}
});
app.post("/scrapes/", async (req, res) => {
const location = req.body.location;
const price = req.body.price;
const beds = req.body.beds;
const type = req.body.type;
const lifestyle = req.body.lifestyle;

// query builder

//todo: save data into the database
try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');

let res = await collection.insertOne({
count: 21,
estimate: Date.now(),
sourceUrl: "https://www.apartments.com",
location: location,
filters: [
{ name: 'price', value: price },
{ name: 'beds', value: beds },
{ name: 'type', value: type },
{ name: 'lifestyle', value: lifestyle },
],
status: "requested"
});
console.log(res);
} catch (err) {
console.log(err);
return res.status(500).json();
}
return res.json();
});
app.patch("/scrapes/:id/execute", async (req, res) => {
const id = req.params.id;

try {
const dbo = client.db(database);
let collection = dbo.collection('scrapes');

var o_id = new ObjectID(id);
var newvalues = { $set: { status: "pending" } };
await collection.updateOne({ _id: o_id }, newvalues);
agenda.now('scrape', { _id: o_id });
return res.status(204).json();
} catch (err) {
console.log(err);
res.status(500).json();
}
});

const port = 3333;
app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`)
});

// Handles graceful stopping of jobs
function graceful() {
agenda.stop(function () {
client.close(function (e) {
if (e) logger.error(e);
process.exit(0);
});
});
}

process.on('SIGTERM', graceful);
process.on('SIGINT', graceful);

Loading…
取消
儲存