aboutsummaryrefslogtreecommitdiff
path: root/rssfilter/rssfilter.php
blob: ab794b487edfeacdd47c12648d10f2c33233bfad (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php

// A script to remove unwanted articles from an RSS feed
// Code based on and inspired by https://github.com/fuzzy76/rssfilter/blob/master/rssfilter.php

// Configuration
$feed = 'https://www.example.com/feed.rss';
$stopwords_title = array('Item one', 'Item two');
$stopwords_url = array('/one/', '/two/');


// Set proper MIME type and encoding
header('Content-Type: application/rss+xml; charset=utf-8');


// Initialize SQLite database
$sqlite = new SQLite3('articles.sqlite', SQLITE3_OPEN_CREATE | SQLITE3_OPEN_READWRITE);
$sqlite->query('CREATE TABLE IF NOT EXISTS "articles" ("title" VARCHAR, "url" VARCHAR, "created" DATETIME DEFAULT CURRENT_TIMESTAMP)');
$sqlite->query('DELETE FROM "articles" WHERE "created" < DATETIME(\'now\', \'-3 days\')');
$select = $sqlite->prepare('SELECT "url" FROM "articles" WHERE "title" = ?');
$insert = $sqlite->prepare('INSERT INTO "articles" ("title", "url") VALUES (?, ?)');


// Process the original feed
$xml = simplexml_load_file($feed);
$ix = 0;
$sqlite->exec('BEGIN');
while ($ix < count($xml->channel->item) ) {
  if ( filter($xml->channel->item[$ix]->title, $xml->channel->item[$ix]->link, $stopwords_title, $stopwords_url, $select, $insert) ) {
    unset($xml->channel->item[$ix]);
  } else {
    $ix++;
  }
}
$sqlite->exec('COMMIT');
$sqlite->close();
echo $xml->asXML();


// Filter article
function filter($title, $url, $stopwords_title, $stopwords_url, $select, $insert) {

  // Filter by title
  foreach ($stopwords_title as &$stopword) {
    if (strpos($title, $stopword) !== false) {
      return true;
    }
  }

  // Filter by URL
  foreach ($stopwords_url as &$stopword) {
    if (strpos($url, $stopword) !== false) {
      return true;
    }
  }

  // Remove duplicates
  $select->bindValue(1, $title);
  $result = $select->execute();
  $row = $result->fetchArray(SQLITE3_ASSOC);
  $result->finalize();
  if ($row !== false) {
    if ($row['url'] != $url) {
      return true;
    }
  } else {
    $insert->bindValue(1, $title);
    $insert->bindValue(2, $url);
    $insert->execute();
  }
  
  return false;
}

?>