import org.serviio.library.metadata.*
import org.serviio.library.online.*
import groovy.json.JsonSlurper
import java.util.zip.ZipEntry
import java.util.zip.ZipInputStream
/**
* WebResource extractor plugin for Pluzz.fr
*
* @author Illico
* @version 1.5
*
*/
class Pluzz extends WebResourceUrlExtractor {
final int VERSION = 15
final VALID_WEBRESOURCE_URL = '(^http://www.pluzz.fr.*$)|(^http://pluzz.francetv.fr.*$)|(^http://pluzz.francetv.fr.*$)|(^http://www.la1ere.fr.*)|(^http://.*.france[12345o].fr.*$)'
final ARCHIVEURL = "http://webservices.francetelevisions.fr/catchup/flux/flux_main.zip"
final THUMBURL = "http://www.pluzz.fr/layoutftv/arches/catchup/images/fb_pluzz.jpg"
final URL_BASE_IMAGES = "http://www.pluzz.fr"
final URL_BASE_VIDEOS = "http://medias2.francetv.fr/catchup-mobile"
final GENRE_REGEX = '^http://pluzz.francetv.fr/a-z/(.*?)/*$'
final CHANNEL_REGEX = '^http://.*(france[23345o]+?).*$|^http://.*(la.*1ere?).*$'
final PROGRAM_REGEX = '^http://pluzz.francetv.fr/videos/.*.html$'
final PROGRAM_CODE_REGEX = ''
final PROGRAM_CHAINE_REGEX = ''
String getExtractorName() {
return getClass().getName()
}
int getVersion() {
return VERSION
}
boolean extractorMatches(URL feedUrl) {
return feedUrl ==~ VALID_WEBRESOURCE_URL;
}
WebResourceContainer extractItems(URL resourceUrl, int maxItems) {
try {
loginfo("ResourceUrl : "+resourceUrl)
List items
String channel = getChannel(resourceUrl)
String genre = getGenre(resourceUrl)
List progparam = getProgram(resourceUrl)
if ( channel != null ) {
loginfo("Channel resquested : "+channel)
items = getItemsByChannel(channel)
} else if ( genre != null ) {
loginfo("Genre resquested : "+genre)
items = getItemsByGenre(genre)
} else if ( progparam != null ) {
loginfo("Program Code resquested : "+progparam[0]+", chaine : "+progparam[1])
items = getItemsByProgram(progparam)
}
assert items != null, "Error no items found"
items = items.sort{ it.title } // Tri des items par titre
return new WebResourceContainer(title: getClass().getName(), thumbnailUrl: THUMBURL, items: items)
} catch (Exception e) {
loginfo("Extract Items of "+resourceUrl+" failed");
return null;
}
}
ContentURLContainer extractUrl(WebResourceItem item, PreferredQuality requestedQuality) {
String url = item.getAdditionalInfo()['WebResourceItemUrl']
String thum = item.getAdditionalInfo()['WebResourceItemThumbnailUrl']
//loginfo("url : "+url);
url = url.replaceAll(/.m3u8$/,"-840k.m3u8")
String MediaUrl = URL_BASE_VIDEOS +url
String MediaThum = URL_BASE_IMAGES +thum
//loginfo("MediaUrl : "+MediaUrl);
def cacheKey = getClass().getName() + "_${requestedQuality}"
return new ContentURLContainer(fileType: MediaFileType.VIDEO, contentUrl: MediaUrl, thumbnailUrl: MediaThum, expiresImmediately: false, cacheKey : cacheKey)
}
private String loginfo(String text) {
log(text);
println(text);
}
static WebResourceContainer testURL(String url, int itemCount = 2) {
Pluzz extractor = new Pluzz();
URL resourceUrl = new URL(url);
println "getExtractorName : " + extractor.getExtractorName();
println "getVersion : " + extractor.getVersion();
assert extractor.extractorMatches(resourceUrl), 'Url doesn\'t match for this WebResource plugin'
println "extractorMatches : " + extractor.extractorMatches(resourceUrl);
WebResourceContainer container = extractor.extractItems(resourceUrl, itemCount);
assert container != null, 'Container is empty'
assert container.items != null, 'Container contains no items'
//assert container.items.size() == itemCount, 'Amount of items is invalid. Expected was ' + itemCount + ', result was ' + container.items.size()
println "extractItems : " + container.items.size()
//println "***** HIGH *****";extractor.extractUrl(container.getItems()[1], PreferredQuality.HIGH);
//println "**** MEDIUM ****";extractor.extractUrl(container.getItems()[1], PreferredQuality.MEDIUM);
//println "***** LOW ******";extractor.extractUrl(container.getItems()[1], PreferredQuality.LOW);
return container
}
private def getItemsByChannel(String channel){
//Return the Item list of the selected channel
try {
List items = []
URLConnection con = new URL(ARCHIVEURL).openConnection()
InputStream is = con.getInputStream()
loginfo("URL :"+ARCHIVEURL)
InputStream zis = new ZipInputStream ( new BufferedInputStream(is));
assert zis.available() == 1, 'Archive Url is not available'
ZipEntry ze
String jsonTxt = ""
String ligne
while ((ze = zis.getNextEntry()) != null) {
if ( ze.getName() == "catch_up_"+channel+".json"){
loginfo("Unzipping : " + ze.getName());
BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8"));
while ((ligne=br.readLine())!=null){ jsonTxt += ligne }
}
}
def json = new JsonSlurper().parseText(jsonTxt);
loginfo(json.chaine.titre+" : "+json.chaine.nb_prog)
json.programmes.each {
WebResourceItem item = new WebResourceItem(title: it.titre+" "+CSA(it.csa_code), additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image])
items << item
}
return items
} catch (Exception e) {
loginfo("Extract Items of "+channel+" channel failed");
return null;
} finally {
zis.close();
}
}
private def getItemsByGenre(String genre){
//Return the Item list of the selected genre
try {
List items = []
List jsonList = []
URLConnection con = new URL(ARCHIVEURL).openConnection()
InputStream is = con.getInputStream()
loginfo("URL :"+ARCHIVEURL)
InputStream zis = new ZipInputStream ( new BufferedInputStream(is));
assert zis.available() == 1, 'Archive Url is not available'
ZipEntry ze
String jsonTxt = ""
String ligne
while ((ze = zis.getNextEntry()) != null) {
if ( ze.getName() ==~ "catch_up_france[123345o]+.json"){
loginfo("Unzipping : " + ze.getName());
BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8"));
while ((ligne=br.readLine())!=null){ jsonTxt += ligne }
jsonList << jsonTxt
jsonTxt = ""
}
}
jsonList.each {
def json = new JsonSlurper().parseText(it);
String Chaine = json.chaine.titre
loginfo(Chaine+" : "+json.chaine.nb_prog)
json.programmes.each {
if ( it.OAS_sitepage.toLowerCase() =~ genre.toLowerCase() ) {
WebResourceItem item = new WebResourceItem(title: it.titre+" "+CSA(it.csa_code)+" "+Chaine, additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image])
items << item
}
}
}
return items
} catch (Exception e) {
loginfo("Extract Items of "+genre+" genre failed");
return null;
} finally {
zis.close();
}
}
private def getItemsByProgram(List progparam){
//Return the Item list of the selected Program
try {
String channel = progparam[1]
String code = progparam[0]
List items = []
URLConnection con = new URL(ARCHIVEURL).openConnection()
InputStream is = con.getInputStream()
loginfo("URL :"+ARCHIVEURL)
InputStream zis = new ZipInputStream ( new BufferedInputStream(is));
assert zis.available() == 1, 'Archive Url is not available'
ZipEntry ze
String jsonTxt = ""
String ligne
while ((ze = zis.getNextEntry()) != null) {
if ( ze.getName() == "catch_up_"+channel+".json"){
loginfo("Unzipping : " + ze.getName());
BufferedReader br = new BufferedReader( new InputStreamReader(zis, "UTF-8"));
while ((ligne=br.readLine())!=null){ jsonTxt += ligne }
}
}
def json = new JsonSlurper().parseText(jsonTxt);
String Chaine = json.chaine.titre
loginfo(Chaine+" : "+json.chaine.nb_prog)
json.programmes.each {
if ( it.code_programme.toLowerCase() =~ code.toLowerCase() ) {
WebResourceItem item = new WebResourceItem(title: it.titre+" "+it.date+" "+CSA(it.csa_code)+" "+Chaine, additionalInfo: ['WebResourceItemUrl': it.url_video, 'WebResourceItemThumbnailUrl': it.url_image_racine+"."+it.extension_image])
items << item
}
}
return items
} catch (Exception e) {
loginfo("Extract Items of "+channel+" channel failed");
return null;
} finally {
zis.close();
}
}
private String getChannel(URL resourceURL) {
// Return the Channel from the resource URL
try {
def match = resourceURL =~ CHANNEL_REGEX
String Channel = ( match[0][2] == null ) ? match[0][1].trim() :"france1";
return Channel
} catch (Exception e) {
loginfo("L'URL n'est pas valide pour le choix par chaine");
return null;
}
}
private String getGenre(URL resourceURL) {
// Return the Genre from the resource URL
try {
def match = resourceURL =~ GENRE_REGEX
return match[0][1].trim()
} catch (Exception e) {
loginfo("L'URL n'est pas valide pour le choix par genre");
return null;
}
}
private List getProgram(URL resourceURL) {
// Return the Program/Channel from the resource URL
try {
if ( resourceURL ==~ PROGRAM_REGEX ) {
def webPage = resourceURL.getText()
def matchcode = webPage =~ PROGRAM_CODE_REGEX
def matchchaine = webPage =~ PROGRAM_CHAINE_REGEX
String channel = ( matchchaine[0][1] == "la_1ere" ) ? "france1" : matchchaine[0][1].trim();
String code = matchcode[0][1];
return [ code, channel ];
} else {
loginfo("L'URL n'est pas valide pour le choix par programme");
return null;
}
} catch (Exception e) {
loginfo("L'URL n'est pas valide pour le choix par programme");
return null;
}
}
private String CSA(String csacode) {
// Return the CSA code
switch (csacode) {
case "TP" : return "|TP|";break;
case "10" : return "|-10|";break;
case "12" : return "|-12|";break;
case "16" : return "|-16|";break;
default: return "|TP|";break;
}
}
static void main(args) {
// TEST BY CHANNEL
// testURL("http://www.la1ere.fr",-1) // OK
// testURL("http://www.france2.fr",-1) // OK
// testURL("http://www.france3.fr",-1) // OK
// testURL("http://www.france4.fr",-1) // OK
// testURL("http://www.france5.fr",-1) // OK
// testURL("http://www.franceo.fr",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/la_1ere/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/france2/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/france3/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/france4/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/france5/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/franceo/",-1) // OK
// TEST BY GENRE
// testURL("http://pluzz.francetv.fr/a-z/jeunesse/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/seriefiction/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/info/",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/culture",-1) // OK
// testURL("http://pluzz.francetv.fr/a-z/documentaire/",-1) // OK
// TEST BY PROGRAM
// testURL("http://pluzz.francetv.fr/videos/echappees_belles.html",-1) //
// testURL("http://pluzz.francetv.fr/videos/explo.html",-1) //
// testURL("http://pluzz.francetv.fr/videos/plus_belle_la_vie.html",-1) // OK
// testURL("http://pluzz.francetv.fr/videos/bien_chef.html",-1) //
// testURL("http://pluzz.francetv.fr/videos/tout_le_sport.html",-1) //
// TEST OTHERS
// testURL("http://www.pluzz.fr",-1)
// testURL("http://pluzz.francetv.fr/",-1)
// testURL("http://pluzz.francetv.fr/enregion/",-1)
// testURL("http://pays-de-la-loire.france3.fr/",-1)
// testURL("http://bretagne.france3.fr/",-1)
}
}