import org.serviio.library.metadata.* import org.serviio.library.online.* import org.serviio.util.* import groovy.json.JsonSlurper import java.util.zip.ZipEntry import java.util.zip.ZipInputStream /** * WebResource extractor plugin for Pluzz.fr * * @author Illico * @version 1.7 * */ class Pluzz extends WebResourceUrlExtractor { final int VERSION = 17 final VALID_WEBRESOURCE_URL = '(^http://www.pluzz.fr.*$)|(^http://pluzz.francetv.fr.*$)|(^http://pluzz.francetv.fr.*$)|(^http://www.la1ere.fr.*)|(^http://.*.france[12345o].fr.*$)' final ARCHIVEURL = "http://webservices.francetelevisions.fr/catchup/flux/flux_main.zip" final THUMBURL = "http://www.pluzz.fr/layoutftv/arches/catchup/images/fb_pluzz.jpg" final URL_BASE_IMAGES = "http://www.pluzz.fr" final URL_BASE_VIDEOS = "http://medias2.francetv.fr/catchup-mobile" final GENRE_REGEX = '^http://pluzz.francetv.fr/a-z/(.*?)/*$' final CHANNEL_REGEX = '^http://.*(france[23345o]+?).*$|^http://.*(la.*1ere?).*$' final PROGRAM_REGEX = '^http://pluzz.francetv.fr/videos/.*.html$' final PROGRAM_CODE_REGEX = '' final PROGRAM_CHAINE_REGEX = '' final URL_HIGH_REGEX = '(http:.*index_2_av.m3u8).*' final URL_MED_REGEX = '(http:.*index_1_av.m3u8).*' final URL_LOW_REGEX = '(http:.*index_0_av.m3u8).*' String getExtractorName() { return getClass().getName() } int getVersion() { return VERSION } boolean extractorMatches(URL feedUrl) { return feedUrl ==~ VALID_WEBRESOURCE_URL; } WebResourceContainer extractItems(URL resourceUrl, int maxItems) { try { loginfo("ResourceUrl : "+resourceUrl) List items String channel = getChannel(resourceUrl) String genre = getGenre(resourceUrl) List progparam = getProgram(resourceUrl) if ( channel != null ) { loginfo("Channel resquested : "+channel) items = getItemsByChannel(channel) } else if ( genre != null ) { loginfo("Genre resquested : "+genre) items = getItemsByGenre(genre) } else if ( progparam != null ) { loginfo("Program Code resquested : "+progparam[0]+", chaine : "+progparam[1]) items = getItemsByProgram(progparam) } assert items != null, "Error no items found" items = items.sort{ it.title } // Tri des items par titre return new WebResourceContainer(title: getClass().getName(), thumbnailUrl: THUMBURL, items: items) } catch (Exception e) { loginfo("Extract Items of "+resourceUrl+" failed"); return null; } } ContentURLContainer extractUrl(WebResourceItem item, PreferredQuality requestedQuality) { String url = item.getAdditionalInfo()['WebResourceItemUrl'] loginfo("Url : "+URL_BASE_VIDEOS+url); String thum = item.getAdditionalInfo()['WebResourceItemThumbnailUrl'] String MediaThum = URL_BASE_IMAGES +thum loginfo("MediaThum: "+MediaThum); //String master = new URL(URL_BASE_VIDEOS+url).getText("utf-8") String master = openURL(new URL(URL_BASE_VIDEOS+url),"") def machMedia switch (requestedQuality) { case "LOW" : machMedia = master =~ URL_LOW_REGEX;break; case "MEDIUM" : machMedia = master =~ URL_MED_REGEX;break; case "HIGH" : machMedia = master =~ URL_HIGH_REGEX;break; default : machMedia = null;loginfo("Quality : ERROR"); } assert machMedia.getCount() > 0, "Error : Page content not compatible" String MediaUrl = machMedia[0][1]; loginfo("MediaUrl : "+MediaUrl); def cacheKey = getClass().getName() + "_${requestedQuality}" return new ContentURLContainer(fileType: MediaFileType.VIDEO, contentUrl: MediaUrl, thumbnailUrl: MediaThum, expiresImmediately: false, cacheKey : cacheKey) } private String loginfo(String text) { log(text); println(text); } static WebResourceContainer testURL(String url, int itemCount = 2) { Pluzz extractor = new Pluzz(); URL resourceUrl = new URL(url); println "getExtractorName : " + extractor.getExtractorName(); println "getVersion : " + extractor.getVersion(); assert extractor.extractorMatches(resourceUrl), 'Url doesn\'t match for this WebResource plugin' println "extractorMatches : " + extractor.extractorMatches(resourceUrl); WebResourceContainer container = extractor.extractItems(resourceUrl, itemCount); assert container != null, 'Container is empty' assert container.items != null, 'Container contains no items' //assert container.items.size() == itemCount, 'Amount of items is invalid. Expected was ' + itemCount + ', result was ' + container.items.size() println "extractItems : " + container.items.size() println "***** HIGH *****";extractor.extractUrl(container.getItems()[1], PreferredQuality.HIGH); println "**** MEDIUM ****";extractor.extractUrl(container.getItems()[1], PreferredQuality.MEDIUM); println "***** LOW ******";extractor.extractUrl(container.getItems()[1], PreferredQuality.LOW); return container } private def getItemsByChannel(String channel){ //Return the Item list of the selected channel try { List items = [] URLConnection con = new URL(ARCHIVEURL).openConnection() InputStream is = con.getInputStream() loginfo("URL :"+ARCHIVEURL) InputStream zis = new ZipInputStream ( new BufferedInputStream(is)); assert zis.available() == 1, 'Archive Url is not available' ZipEntry ze String jsonTxt = "" String ligne while ((ze = zis.getNextEntry()) != null) { if ( ze.getName() == "catch_up_"+channel+".json"){ loginfo("Unzipping : " + ze.getName()); BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8")); while ((ligne=br.readLine())!=null){ jsonTxt += ligne } } } def json = new JsonSlurper().parseText(jsonTxt); loginfo(json.chaine.titre+" : "+json.chaine.nb_prog) json.programmes.each { WebResourceItem item = new WebResourceItem(title: it.titre+" "+it.date+" "+CSA(it.csa_code), additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image]) items << item } return items } catch (Exception e) { loginfo("Extract Items of "+channel+" channel failed"); return null; } finally { zis.close(); } } private def getItemsByGenre(String genre){ //Return the Item list of the selected genre try { List items = [] List jsonList = [] URLConnection con = new URL(ARCHIVEURL).openConnection() InputStream is = con.getInputStream() loginfo("URL :"+ARCHIVEURL) InputStream zis = new ZipInputStream ( new BufferedInputStream(is)); assert zis.available() == 1, 'Archive Url is not available' ZipEntry ze String jsonTxt = "" String ligne while ((ze = zis.getNextEntry()) != null) { if ( ze.getName() ==~ "catch_up_france[123345o]+.json"){ loginfo("Unzipping : " + ze.getName()); BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8")); while ((ligne=br.readLine())!=null){ jsonTxt += ligne } jsonList << jsonTxt jsonTxt = "" } } jsonList.each { def json = new JsonSlurper().parseText(it); String Chaine = json.chaine.titre loginfo(Chaine+" : "+json.chaine.nb_prog) json.programmes.each { if ( it.OAS_sitepage.toLowerCase() =~ genre.toLowerCase() ) { WebResourceItem item = new WebResourceItem(title: it.titre+" "+it.date+" "+CSA(it.csa_code)+" "+Chaine, additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image]) items << item } } } return items } catch (Exception e) { loginfo("Extract Items of "+genre+" genre failed"); return null; } finally { zis.close(); } } private def getItemsByProgram(List progparam){ //Return the Item list of the selected Program try { String channel = progparam[1] String code = progparam[0] List items = [] URLConnection con = new URL(ARCHIVEURL).openConnection() InputStream is = con.getInputStream() loginfo("URL :"+ARCHIVEURL) InputStream zis = new ZipInputStream ( new BufferedInputStream(is)); assert zis.available() == 1, 'Archive Url is not available' ZipEntry ze String jsonTxt = "" String ligne while ((ze = zis.getNextEntry()) != null) { if ( ze.getName() == "catch_up_"+channel+".json"){ loginfo("Unzipping : " + ze.getName()); BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8")); while ((ligne=br.readLine())!=null){ jsonTxt += ligne } } } def json = new JsonSlurper().parseText(jsonTxt); String Chaine = json.chaine.titre loginfo(Chaine+" : "+json.chaine.nb_prog) json.programmes.each { if ( it.code_programme.toLowerCase() =~ code.toLowerCase() ) { WebResourceItem item = new WebResourceItem(title: it.titre+" "+it.date+" "+CSA(it.csa_code)+" "+Chaine, additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image]) items << item } } return items } catch (Exception e) { loginfo("Extract Items of "+channel+" channel failed"); return null; } finally { zis.close(); } } private String getChannel(URL resourceURL) { // Return the Channel from the resource URL try { def match = resourceURL =~ CHANNEL_REGEX assert match != null String Channel = ( match[0][2] == null ) ? match[0][1].trim() :"france1"; return Channel } catch (Exception e) { loginfo("L'URL n'est pas valide pour le choix par chaine"); return null; } } private String getGenre(URL resourceURL) { // Return the Genre from the resource URL try { def match = resourceURL =~ GENRE_REGEX assert match != null return match[0][1].trim() } catch (Exception e) { loginfo("L'URL n'est pas valide pour le choix par genre"); return null; } } private List getProgram(URL resourceURL) { // Return the Program/Channel from the resource URL try { if ( resourceURL ==~ PROGRAM_REGEX ) { def webPage = resourceURL.getText() def matchcode = webPage =~ PROGRAM_CODE_REGEX def matchchaine = webPage =~ PROGRAM_CHAINE_REGEX String channel = ( matchchaine[0][1] == "la_1ere" ) ? "france1" : matchchaine[0][1].trim(); String code = matchcode[0][1]; return [code, channel]; } else { loginfo("L'URL n'est pas valide pour le choix par programme"); return null; } } catch (Exception e) { loginfo("L'URL n'est pas valide pour le choix par programme"); return null; } } private String CSA(String csacode) { // Return the CSA code switch (csacode) { case "TP" : return "|TP|";break; case "10" : return "|-10|";break; case "12" : return "|-12|";break; case "16" : return "|-16|";break; default: return "|TP|";break; } } static void main(args) { // TEST BY CHANNEL // testURL("http://www.la1ere.fr",-1) // OK testURL("http://www.france2.fr",-1) // OK // testURL("http://www.france3.fr",-1) // OK // testURL("http://www.france4.fr",-1) // OK // testURL("http://www.france5.fr",-1) // OK // testURL("http://www.franceo.fr",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/la_1ere/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/france2/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/france3/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/france4/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/france5/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/franceo/",-1) // OK // TEST BY GENRE testURL("http://pluzz.francetv.fr/a-z/jeunesse/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/seriefiction/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/info/",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/culture",-1) // OK // testURL("http://pluzz.francetv.fr/a-z/documentaire/",-1) // OK // TEST BY PROGRAM // testURL("http://pluzz.francetv.fr/videos/echappees_belles.html",-1) // OK // testURL("http://pluzz.francetv.fr/videos/explo.html",-1) // OK // testURL("http://pluzz.francetv.fr/videos/plus_belle_la_vie.html",-1) // OK // testURL("http://pluzz.francetv.fr/videos/bien_chef.html",-1) // OK // testURL("http://pluzz.francetv.fr/videos/tout_le_sport.html",-1) // OK // TEST OTHERS // testURL("http://www.pluzz.fr",-1) // testURL("http://pluzz.francetv.fr/",-1) // testURL("http://pluzz.francetv.fr/enregion/",-1) // testURL("http://pays-de-la-loire.france3.fr/",-1) // testURL("http://bretagne.france3.fr/",-1) } }