Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. const config = require('config');
  2. const axios = require("axios");
  3. const cheerio = require("cheerio");
  4. const express = require('express');
  5. const cors = require('cors');
  6. const MongoClient = require('mongodb').MongoClient;
  7. const ObjectID = require('mongodb').ObjectID;
  8. var apartments = require('./apartments.js');
  9. var houses = require('./houses.js');
  10. var condos = require('./condos.js');
  11. // jobs
  12. var mongoUrl = config.get("mongo");
  13. var agendaDb = config.get("agenda");
  14. const Agenda = require('agenda').Agenda;
  15. const agenda = new Agenda({ db: { address: agendaDb } });
  16. agenda.define('scrape', async function (job, done) {
  17. const { _id } = job.attrs.data;
  18. try {
  19. const dbo = client.db(database);
  20. let collection = dbo.collection('scrapes');
  21. let scrape = await collection.findOne({ _id: _id });
  22. scrape.sourceUrl = "https://www.apartments.com/condos/chicago-il/";
  23. const filterPage = await axios(scrape.sourceUrl);
  24. const html = filterPage.data;
  25. const $ = cheerio.load(html);
  26. const propertyLins = $('#placardContainer .property-link').map(function () {
  27. return $(this).attr('href');
  28. }).get();
  29. var properties = [];
  30. for (const link of propertyLins){
  31. var response = await axios(link);
  32. //apartments
  33. //var property = apartments.apartment(cheerio.load(response.data));
  34. //houses
  35. //var property = houses.house(cheerio.load(response.data));
  36. //condos
  37. var property = condos.condo(cheerio.load(response.data));
  38. properties.push(property);
  39. console.log(`${link} scraped.`);
  40. }
  41. await collection.updateOne({ _id: _id }, { $set: { status: "done", result: properties } });
  42. console.log(`${_id} scraped.`);
  43. return done();
  44. } catch (err) {
  45. console.log(err);
  46. }
  47. });
  48. (async function () {
  49. await agenda.start();
  50. })();
  51. // express application
  52. const app = express();
  53. app.use(express.json());
  54. app.use(cors());
  55. // database setup
  56. var mongoUrl = config.get("mongo");
  57. var database = config.get("database");
  58. var client = undefined;
  59. MongoClient.connect(mongoUrl, function (err, db) {
  60. if (err) throw err;
  61. console.log("Database created!");
  62. console.log(mongoUrl);
  63. client = db;
  64. var dbo = db.db(database);
  65. dbo.createCollection("scrapes", function (err, res) {
  66. if (err) {
  67. console.log("Collection already created!");
  68. return;
  69. }
  70. console.log("Collection created!");
  71. });
  72. });
  73. // app.set('json spaces', 2);
  74. // const axios = require('axios');
  75. // const cheerio = require('cheerio');
  76. // const url = 'https://www.apartments.com/two-west-chicago-il/jqn1nf6/';
  77. // app.get('/', (req, res) => {
  78. // axios(url).then(response => {
  79. // const html = response.data;
  80. // const $ = cheerio.load(html);
  81. // var data = apartments.apartment($);
  82. // res.json(data);
  83. // });
  84. // });
  85. // app.get('/houses/*', (req, res) => {
  86. // var url = req.params[0];
  87. // axios(url).then(response => {
  88. // const html = response.data;
  89. // const $ = cheerio.load(html);
  90. // var data = houses.house($);
  91. // res.json(data);
  92. // });
  93. // });
  94. // app.get('/filters/*', async (req, res) => {
  95. // var url = req.params[0];
  96. // const filterPage = await axios(url);
  97. // const html = filterPage.data;
  98. // const $ = cheerio.load(html);
  99. // const propertyLins = $('#placardContainer .property-link').map(function () {
  100. // return $(this).attr('href');
  101. // }).get();
  102. // var properties = [];
  103. // for (const link of propertyLins){
  104. // var response = await axios(link);
  105. // var property = apartments.apartment(cheerio.load(response.data));
  106. // properties.push(property);
  107. // }
  108. // res.json(properties);
  109. // });
  110. // app.get('/apartments/*', (req, res) => {
  111. // var url = req.params[0];
  112. // axios(url).then(response => {
  113. // const html = response.data;
  114. // const $ = cheerio.load(html);
  115. // var data = apartments.apartment($);
  116. // createListing(client, data);
  117. // res.json(data);
  118. // });
  119. // });
  120. app.get("/scrapes", async (req, res) => {
  121. try {
  122. const dbo = client.db(database);
  123. let collection = dbo.collection('scrapes');
  124. let data = await collection.find({}).toArray();
  125. return res.json(data);
  126. } catch (err) {
  127. console.log(err);
  128. return res.status(500).json();
  129. }
  130. });
  131. app.get("/scrapes/:id", async (req, res) => {
  132. const id = req.params.id;
  133. try {
  134. const dbo = client.db(database);
  135. let collection = dbo.collection('scrapes');
  136. var o_id = new ObjectID(id);
  137. let data = await collection.findOne({ _id: o_id });
  138. return res.json(data);
  139. } catch (err) {
  140. console.log(err);
  141. res.status(500).json();
  142. }
  143. });
  144. app.post("/scrapes/", async (req, res) => {
  145. const location = req.body.location;
  146. const price = req.body.price;
  147. const beds = req.body.beds;
  148. const type = req.body.type;
  149. const lifestyle = req.body.lifestyle;
  150. // query builder
  151. //todo: save data into the database
  152. try {
  153. const dbo = client.db(database);
  154. let collection = dbo.collection('scrapes');
  155. let res = await collection.insertOne({
  156. count: 21,
  157. estimate: Date.now(),
  158. sourceUrl: "https://www.apartments.com",
  159. location: location,
  160. filters: [
  161. { name: 'price', value: price },
  162. { name: 'beds', value: beds },
  163. { name: 'type', value: type },
  164. { name: 'lifestyle', value: lifestyle },
  165. ],
  166. status: "requested"
  167. });
  168. console.log(res);
  169. } catch (err) {
  170. console.log(err);
  171. return res.status(500).json();
  172. }
  173. return res.json();
  174. });
  175. app.patch("/scrapes/:id/execute", async (req, res) => {
  176. const id = req.params.id;
  177. try {
  178. const dbo = client.db(database);
  179. let collection = dbo.collection('scrapes');
  180. var o_id = new ObjectID(id);
  181. var newvalues = { $set: { status: "pending" } };
  182. await collection.updateOne({ _id: o_id }, newvalues);
  183. agenda.now('scrape', { _id: o_id });
  184. return res.status(204).json();
  185. } catch (err) {
  186. console.log(err);
  187. res.status(500).json();
  188. }
  189. });
  190. const port = 3333;
  191. app.listen(port, () => {
  192. console.log(`Example app listening at http://localhost:${port}`)
  193. });
  194. // Handles graceful stopping of jobs
  195. function graceful() {
  196. agenda.stop(function () {
  197. client.close(function (e) {
  198. if (e) logger.error(e);
  199. process.exit(0);
  200. });
  201. });
  202. }
  203. process.on('SIGTERM', graceful);
  204. process.on('SIGINT', graceful);