From 6e680ee71b37490f4c448de6d95f86f147037559 Mon Sep 17 00:00:00 2001 From: Thorsten Ortlepp Date: Sun, 11 Apr 2021 16:19:22 +0200 Subject: Added new script feed2podcast --- README | 7 ++ feed2podcast/feed2podcast.php | 187 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 feed2podcast/feed2podcast.php diff --git a/README b/README index 09ab543..b644639 100644 --- a/README +++ b/README @@ -2,6 +2,13 @@ PHP stuff ========= +feed2podcast +--------- +A simple script to convert an RSS feed into a podcast. Designed for +Deutschlandfunk Kalenderblatt which is only available online and as +RSS feed but not as podcast. + + rssfilter --------- A simple script to remove unwanted articles from an RSS feed, either diff --git a/feed2podcast/feed2podcast.php b/feed2podcast/feed2podcast.php new file mode 100644 index 0000000..0c16481 --- /dev/null +++ b/feed2podcast/feed2podcast.php @@ -0,0 +1,187 @@ +'; +echo ''; +echo ''; +echo ''.$xml->channel->title.''; +echo 'https://www.deutschlandfunk.de/kalenderblatt.870.de.html'; +echo 'Das Kalenderblatt stellt historische Ereignisse von Bedeutung oder von Relevanz in anschaulicher Weise dar.'; +echo ''.$xml->channel->category.''; +echo ''.$xml->channel->copyright.''; +echo ''.$xml->channel->language.''; +echo ''.$xml->channel->pubDate.''; +echo ''.$xml->channel->lastBuildDate.''; +echo ''.$xml->channel->ttl.''; +echo ''; +echo ' https://'.$_SERVER['HTTP_HOST'].'/image.png'; +echo ' '.$xml->channel->image->title.''; +echo ' '.$xml->channel->image->link.''; +echo ' '.$xml->channel->image->description.''; +echo ''; +echo ''; +echo 'Die Beiträge zur Sendung'; +echo ''; +echo 'https://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'].''; +echo ''; +echo ' Redaktion deutschlandradio.de'; +echo ' podcast@deutschlandradio.de'; +echo ''; +echo 'Deutschlandfunk'; +echo 'No'; +echo ''; + + +// Write podcast episodes +$containing = array(); +foreach ($xml->channel->item as $item) { + $content = getPageContent($item->link); + + // Only add episode to feed if media file is available + if ($content[0] != 'XXX') { + $id = md5($item->guid); + $filename = $id.'.aac'; + + // Download episode if not yet done + if (!file_exists($downloads.'/'.$filename)) { + foreach (glob($temp.'/*') as $file) { + if (is_file($file)) { + unlink($file); + } + } + downloadMediaFile($content[1], $id, $temp, $downloads); + } + + array_push($containing, $downloads.'/'.$filename); + + // Write episode to podcast feed + echo ''; + echo ' '.$item->title.''; + echo ' https://'.$_SERVER['HTTP_HOST'].'/'.$downloads.'/'.$filename.''; + echo ' '.$item->description.''; + echo ' '.$item->pubDate.''; + echo ' '.$item->guid.''; + echo ' '; + echo ' '.$content[0].''; + echo ' '.$content[2].''; + echo ''; + } +} + +// Remove unused episode media files +foreach (glob($downloads.'/*') as $file) { + if(is_file($file) && !in_array($file, $containing)) { + unlink($file); + } +} + + +// Write podcast feed end +echo ''; +echo ''; + + +///// --- FUNCTIONS --- \\\\\ + + +// Get selected content from episode website +function getPageContent($site) { + $dom = new DomDocument(); + $dom->loadHTML(download($site)); + $xpath = new DOMXpath($dom); + $player = $xpath->query("//a[@class='player-embed']"); + $author = $xpath->query("//p[@class='author']"); + + if ($player->length == 0) { + return array('XXX'); + } + + $authorname = str_replace('Von ', '', $author->item(0)->nodeValue); + + $minutes = intdiv(intval($player->item(0)->getAttribute('data-audio-duration')), 60); + $seconds = intval($player->item(0)->getAttribute('data-audio-duration')) % 60; + $duration = $minutes.':'.sprintf('%02d', $seconds); + + // Returns array [episode author name | media file url | episode duration] + return array($authorname, $player->item(0)->getAttribute('data-audio-src'), $duration); +} + + +// Download episode media file +function downloadMediaFile($url, $id, $temp, $downloads) { + $regex = "/([^\n\r]+)/m"; + + // Download first playlist and get "inner" playlist + preg_match_all($regex, download($url), $lines); + $playlist = array_values(array_filter($lines[1], "isUrl"))[0]; + + // Download contents of "inner" playlist + preg_match_all($regex, download($playlist), $urls); + + // Download all media file segments + $counter = 0; + foreach (array_filter($urls[1], "isUrl") as $url) { + $outfile = fopen($temp.'/'.$counter.'.ts', 'wb') or exit('File open failed'); + + $curl = curl_init(); + curl_setopt($curl, CURLOPT_FILE, $outfile); + curl_setopt($curl, CURLOPT_HEADER, 0); + curl_setopt($curl, CURLOPT_URL, $url); + curl_exec($curl); + curl_close($curl); + + fclose($outfile); + + file_put_contents($temp.'/list.txt', 'file '.$counter.'.ts'."\n", FILE_APPEND); + + $counter++; + } + + // Concatenate segments to media file + $ffmpeg = './ffmpeg -f concat -i '.$temp.'/list.txt -c copy -bsf:a aac_adtstoasc '.$downloads.'/'.$id.'.aac'; + exec($ffmpeg); +} + + +// Download URL and return content +function download($url) { + $curl = curl_init(); + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); + $download = curl_exec($curl); + curl_close($curl); + return $download; +} + + +// Check if a string looks like a url +function isUrl($var) { + return !(strpos( $var , 'http') === false); +} + +?> -- cgit v1.2.3