1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
<?php
// A script to remove unwanted articles from an RSS feed
// Code based on and inspired by https://github.com/fuzzy76/rssfilter/blob/master/rssfilter.php
// Configuration
$feed = 'https://www.example.com/feed.rss';
$stopwords_title = array('Item one', 'Item two');
$stopwords_url = array('/one/', '/two/');
// Set proper MIME type and encoding
header('Content-Type: application/rss+xml; charset=utf-8');
// Initialize SQLite database
$sqlite = new SQLite3('articles.sqlite', SQLITE3_OPEN_CREATE | SQLITE3_OPEN_READWRITE);
$sqlite->query('CREATE TABLE IF NOT EXISTS "articles" ("title" VARCHAR, "url" VARCHAR, "created" DATETIME DEFAULT CURRENT_TIMESTAMP)');
$sqlite->query('DELETE FROM "articles" WHERE "created" < DATETIME(\'now\', \'-3 days\')');
$select = $sqlite->prepare('SELECT "url" FROM "articles" WHERE "title" = ?');
$insert = $sqlite->prepare('INSERT INTO "articles" ("title", "url") VALUES (?, ?)');
// Process the original feed
$xml = simplexml_load_file($feed);
$ix = 0;
$sqlite->exec('BEGIN');
while ($ix < count($xml->channel->item) ) {
if ( filter($xml->channel->item[$ix]->title, $xml->channel->item[$ix]->link, $stopwords_title, $stopwords_url, $select, $insert) ) {
unset($xml->channel->item[$ix]);
} else {
$ix++;
}
}
$sqlite->exec('COMMIT');
$sqlite->close();
echo $xml->asXML();
// Filter article
function filter($title, $url, $stopwords_title, $stopwords_url, $select, $insert) {
// Filter by title
foreach ($stopwords_title as &$stopword) {
if (strpos($title, $stopword) !== false) {
return true;
}
}
// Filter by URL
foreach ($stopwords_url as &$stopword) {
if (strpos($url, $stopword) !== false) {
return true;
}
}
// Remove duplicates
$select->bindValue(1, $title);
$result = $select->execute();
$row = $result->fetchArray(SQLITE3_ASSOC);
$result->finalize();
if ($row !== false) {
if ($row['url'] != $url) {
return true;
}
} else {
$insert->bindValue(1, $title);
$insert->bindValue(2, $url);
$insert->execute();
}
return false;
}
?>
|