diff options
author | Thorsten Ortlepp <post@ortlepp.eu> | 2021-04-11 16:19:22 +0200 |
---|---|---|
committer | Thorsten Ortlepp <post@ortlepp.eu> | 2021-04-11 16:19:22 +0200 |
commit | 6e680ee71b37490f4c448de6d95f86f147037559 (patch) | |
tree | 8c8cdf5096aaa9e431e3c3fffdac8ccae290e39c | |
parent | ab7a3c3344fe4e993299a8b6bceecd4932921be5 (diff) | |
download | php-stuff-6e680ee71b37490f4c448de6d95f86f147037559.zip |
Added new script feed2podcast
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | feed2podcast/feed2podcast.php | 187 |
2 files changed, 194 insertions, 0 deletions
@@ -2,6 +2,13 @@ PHP stuff ========= +feed2podcast +--------- +A simple script to convert an RSS feed into a podcast. Designed for +Deutschlandfunk Kalenderblatt which is only available online and as +RSS feed but not as podcast. + + rssfilter --------- A simple script to remove unwanted articles from an RSS feed, either diff --git a/feed2podcast/feed2podcast.php b/feed2podcast/feed2podcast.php new file mode 100644 index 0000000..0c16481 --- /dev/null +++ b/feed2podcast/feed2podcast.php @@ -0,0 +1,187 @@ +<?php +// A script to convert an RSS feed into a podcast + +// Configuration +$feed = 'https://www.deutschlandfunk.de/kalenderblatt.870.de.rss'; +$downloads = 'audio'; +$temp = 'temp'; + + +// Setup +if (!is_dir($downloads)) { + mkdir($downloads); +} +if (!is_dir($temp)) { + mkdir($temp); +} + + +// Set proper MIME type and encoding +header('Content-Type: application/rss+xml; charset=utf-8'); + + +// Get the original feed +$xml = simplexml_load_file($feed); + + +// Write podcast feed intro +echo '<?xml version="1.0" encoding="utf-8"?>'; +echo '<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">'; +echo '<channel>'; +echo '<title>'.$xml->channel->title.'</title>'; +echo '<link>https://www.deutschlandfunk.de/kalenderblatt.870.de.html</link>'; +echo '<description>Das Kalenderblatt stellt historische Ereignisse von Bedeutung oder von Relevanz in anschaulicher Weise dar.</description>'; +echo '<category>'.$xml->channel->category.'</category>'; +echo '<copyright>'.$xml->channel->copyright.'</copyright>'; +echo '<language>'.$xml->channel->language.'</language>'; +echo '<pubDate>'.$xml->channel->pubDate.'</pubDate>'; +echo '<lastBuildDate>'.$xml->channel->lastBuildDate.'</lastBuildDate>'; +echo '<ttl>'.$xml->channel->ttl.'</ttl>'; +echo '<image>'; +echo ' <url>https://'.$_SERVER['HTTP_HOST'].'/image.png</url>'; +echo ' <title>'.$xml->channel->image->title.'</title>'; +echo ' <link>'.$xml->channel->image->link.'</link>'; +echo ' <description>'.$xml->channel->image->description.'</description>'; +echo '</image>'; +echo '<atom:link rel="self" type="application/rss+xml" href="https://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'].'" />'; +echo '<itunes:subtitle>Die Beiträge zur Sendung</itunes:subtitle>'; +echo '<itunes:image href="https://'.$_SERVER['HTTP_HOST'].'/image.png"/>'; +echo '<itunes:new-feed-url>https://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'].'</itunes:new-feed-url>'; +echo '<itunes:owner>'; +echo ' <itunes:name>Redaktion deutschlandradio.de</itunes:name>'; +echo ' <itunes:email>podcast@deutschlandradio.de</itunes:email>'; +echo '</itunes:owner>'; +echo '<itunes:author>Deutschlandfunk</itunes:author>'; +echo '<itunes:explicit>No</itunes:explicit>'; +echo '<itunes:category text="History" />'; + + +// Write podcast episodes +$containing = array(); +foreach ($xml->channel->item as $item) { + $content = getPageContent($item->link); + + // Only add episode to feed if media file is available + if ($content[0] != 'XXX') { + $id = md5($item->guid); + $filename = $id.'.aac'; + + // Download episode if not yet done + if (!file_exists($downloads.'/'.$filename)) { + foreach (glob($temp.'/*') as $file) { + if (is_file($file)) { + unlink($file); + } + } + downloadMediaFile($content[1], $id, $temp, $downloads); + } + + array_push($containing, $downloads.'/'.$filename); + + // Write episode to podcast feed + echo '<item>'; + echo ' <title>'.$item->title.'</title>'; + echo ' <link>https://'.$_SERVER['HTTP_HOST'].'/'.$downloads.'/'.$filename.'</link>'; + echo ' <description>'.$item->description.'</description>'; + echo ' <pubDate>'.$item->pubDate.'</pubDate>'; + echo ' <guid>'.$item->guid.'</guid>'; + echo ' <enclosure url="https://'.$_SERVER['HTTP_HOST'].'/'.$downloads.'/'.$filename.'" length="'.filesize($downloads.'/'.$filename).'" type="audio/aac"/>'; + echo ' <itunes:author>'.$content[0].'</itunes:author>'; + echo ' <itunes:duration>'.$content[2].'</itunes:duration>'; + echo '</item>'; + } +} + +// Remove unused episode media files +foreach (glob($downloads.'/*') as $file) { + if(is_file($file) && !in_array($file, $containing)) { + unlink($file); + } +} + + +// Write podcast feed end +echo '</channel>'; +echo '</rss>'; + + +///// --- FUNCTIONS --- \\\\\ + + +// Get selected content from episode website +function getPageContent($site) { + $dom = new DomDocument(); + $dom->loadHTML(download($site)); + $xpath = new DOMXpath($dom); + $player = $xpath->query("//a[@class='player-embed']"); + $author = $xpath->query("//p[@class='author']"); + + if ($player->length == 0) { + return array('XXX'); + } + + $authorname = str_replace('Von ', '', $author->item(0)->nodeValue); + + $minutes = intdiv(intval($player->item(0)->getAttribute('data-audio-duration')), 60); + $seconds = intval($player->item(0)->getAttribute('data-audio-duration')) % 60; + $duration = $minutes.':'.sprintf('%02d', $seconds); + + // Returns array [episode author name | media file url | episode duration] + return array($authorname, $player->item(0)->getAttribute('data-audio-src'), $duration); +} + + +// Download episode media file +function downloadMediaFile($url, $id, $temp, $downloads) { + $regex = "/([^\n\r]+)/m"; + + // Download first playlist and get "inner" playlist + preg_match_all($regex, download($url), $lines); + $playlist = array_values(array_filter($lines[1], "isUrl"))[0]; + + // Download contents of "inner" playlist + preg_match_all($regex, download($playlist), $urls); + + // Download all media file segments + $counter = 0; + foreach (array_filter($urls[1], "isUrl") as $url) { + $outfile = fopen($temp.'/'.$counter.'.ts', 'wb') or exit('File open failed'); + + $curl = curl_init(); + curl_setopt($curl, CURLOPT_FILE, $outfile); + curl_setopt($curl, CURLOPT_HEADER, 0); + curl_setopt($curl, CURLOPT_URL, $url); + curl_exec($curl); + curl_close($curl); + + fclose($outfile); + + file_put_contents($temp.'/list.txt', 'file '.$counter.'.ts'."\n", FILE_APPEND); + + $counter++; + } + + // Concatenate segments to media file + $ffmpeg = './ffmpeg -f concat -i '.$temp.'/list.txt -c copy -bsf:a aac_adtstoasc '.$downloads.'/'.$id.'.aac'; + exec($ffmpeg); +} + + +// Download URL and return content +function download($url) { + $curl = curl_init(); + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); + $download = curl_exec($curl); + curl_close($curl); + return $download; +} + + +// Check if a string looks like a url +function isUrl($var) { + return !(strpos( $var , 'http') === false); +} + +?> |