Monday, 29 February 2016

Python Project : Media Downloader

Project Description :

The script has to parse a configuration file called feeds.txt that contains four column. The fourth column contains the URL of a xml file. The script should download this xml file and parse this xml and find all media file like .png,.jpg or .pdf file. The links of the media files are relative to the xmlfile.

Following is the sample feeds.txt file :

landau feed1 landaumedia http://www.ps.landaumedia.de/4cf36ec2503400027400014a/Medienlandschaft/xmlfeed.xml
landau feed2 landaumedia http://www.ps.landaumedia.de/4c8df1fe8feeb50d54000091/W%C3%B6chentlicher%20Pressespiegel/xmlfeed.xml
bahlsen  feed1 landaumedia http://www.ps.landaumedia.de/557e5bb721d6bc0e4c6572cc/T%C3%A4glicher%20Pressespiegel/xmlfeed.xml
block-house feed1 landaumedia http://www.ps.landaumedia.de/5065af429295461238cd388d/T%C3%A4glicher%20Pressespiegel/xmlfeed.xml
claas  feed1 landaumedia http://www.ps.landaumedia.de/55f9603c70e1df143858a56b/T%C3%A4glicher%20Pressespiegel/xmlfeed.xml
claas  feed2 meltwater https://app.meltwater.com/gyda/outputs/566fcddff1e0c684a2930228/rendering?apiKey=55944bdf484b6d23a61cd26e&type=xml

The script should read above file with four columns. The first column contains the name of the client. The second column contains the name of the feed, the third column contains the format name of the feed so that you can distinguish between the two type of feeds. The forth and last column contains the actual feed URL.

The script has to create following directory structure automatically for the above feeds.txt file.
├───bahlsen
│   └───feed1
│       └───landaumedia
│           └───2016-02-19
│               ├───article_1
│               ├───article_3
│               ├───article_4
│               ├───article_5
│               ├───article_6
│               ├───article_7
│               ├───article_8
│               └───article_9
├───block-house
│   └───feed1
│       └───landaumedia
│           └───2016-02-19
│               ├───article_1
│               ├───article_2
│               └───article_3
├───claas
│   ├───feed1
│   │   └───landaumedia
│   │       └───2016-02-19
│   │           ├───article_1
│   │           ├───article_2
│   │           ├───article_3
│   │           ├───article_4
│   └───feed2
│       └───meltwater
│           └───2016-02-19
└───landau
    ├───feed1
    │   └───landaumedia
    │       └───2016-02-19
    │           ├───article_1
    │           ├───article_2
    └───feed2
        └───landaumedia
            └───2016-02-19
                ├───article_1
                ├───article_2
                ├───article_3
                ├───article_4
                └───article_5

Note : article_* are the final directories where all media file like .pdf, .jpg, .png etc will store and it will look like below format.

./feeds/client1/feed1/2016-02-15/xmlfeed.xml
./feeds/client1/feed1/2016-02-15/article_1/die welt.png
./feeds/client1/feed1/2016-02-15/article_1/Welt_online_Die_Handelsblatt_plant_Klage_gegen_die_Lufthansa_1.pdf
./feeds/client1/feed1/2016-02-15/article_2/zzz_handelsblatt_com_530.png
./feeds/client1/feed1/2016-02-15/article_2/Handelsblatt_Online_Medienhaus_blaest_Wechsel_der_Rechtsform_ab_2.pdf
./feeds/client1/feed1/2016-02-15/article_3/frankfurter allgemeine zeitung online.png
Python Program for above problem


#!/usr/bin/python
# coding: UTF-8

#__author__ = 'Mukesh.Kumar' 

import os,time,subprocess
from xml.dom import minidom
import urllib,urlparse

def mediaExtractor(xmlUrl,directory):
# mediaListCommand="wget -q -O- "+xmlUrl+" | grep -iPo 'article_.*\.[a-z]{2,4}'"
 xmlFile=directory+"//xmlfeed.xml"
 mediaListCommand="cat "+xmlFile+" | grep -iPo 'article_.*\.[a-z]{2,4}'"
 mediaList=((subprocess.Popen(['/bin/bash', '-c', mediaListCommand], stdout=subprocess.PIPE)).communicate()[0]).splitlines()
 for med in mediaList:
  med1=med.split('/')
  artDir=med1[0]
  finalDir=directory+"//"+artDir
  
  if not os.path.exists(finalDir) and "/" in med:
   os.makedirs(finalDir)
  else:
   pass

  medURL=urlparse.urljoin(xmlUrl,med)
  print artDir
  print medURL
  try:
    urllib.urlretrieve(medURL,finalDir+"//"+med1[1])
  except:
   pass  
 

def feedProcessing(feedList):
        for feed in feedList:
   print "Processing feed : "+feed
   print "-----------------------------"
                feedSplit=feed.split(" ")
                dir1=feedSplit[0]
                dir2=feedSplit[1]
                dir3=feedSplit[2]
                dir4=time.strftime("%Y-%m-%d")
                xmlUrl=feedSplit[3]
                directory=os.getcwd()+"//"+dir1+"//"+dir2+"//"+dir3+"//"+dir4
#               print directory
                if not os.path.exists(directory):
                        os.makedirs(directory)
                else:
                        pass

    # Downloading xml file
    print "------Downloading xml file feed -------------------"
    try:
     urllib.urlretrieve(xmlUrl,directory+"//xmlfeed.xml")
    except:
     print "Some error while downloading xml file"

    try:
     mediaExtractor(xmlUrl,directory)
    except:
     print "Fail to extract media for : "+xmlUrl


def main():
        feedFilepath=os.getcwd()+"//feeds.txt"
        if os.path.exists(feedFilepath) == False:
                print "Feed file not found in current directory"
        else:
                print "Feed file found and processing : "+feedFilepath

                feedList=[]
                feedFile=open(feedFilepath,'r')
                feedLines=feedFile.readlines()
                for line in feedLines:
                        line=line.replace('\t',' ')
                        line=line.replace('\n','')
                        while "  " in line:
                                line=line.replace('  ',' ')

                        feedList.append(line)
                        print line
      print "------------------"
                feedProcessing(feedList)


# main()
if __name__ == "__main__":
    main()

Tuesday, 23 February 2016

The VI Editor

The vi editor (stands for visual editor) is a screen editor which is available on almost all Unix systems. Once you have learned vi, you will find that it is a fast and powerful editor. There is also a improved version of VI called VIM.

Vi editor works on different UNIX flavors. It is fast and powerful undo features. There are few limitations also there it is less user friendly, high case sensitive and keystroke could have more than one meanings.

Working modes of VI editor :

Command Mode : Keys are interpreted as commands.
Insert Mode         : Keys are interpreted as data
Escape Mode       : Keys are interpreted for saving/exiting purposes





Whenever a particular file is opened using the vi editor, it opens up in the Command mode. In this mode, the keyboard-character represents command and not data. A user cannot enter the data in the Command mode. The user has to switch over to the insert mode to enter the data. This could be done using the letters like "a", "i", etc. Once the user is in insert mode, the data could be entered. The user could switch back to the Command mode. For saving or exiting the application the user has to get into the Escape mode by pressing "Esc" key.

General Startup
 
    To use vi: [vi filename] + Enter
    To exit vi and save changes:    Esc Key+[ ZZ   or  :wq ] + Enter
    To exit vi without saving changes:    [ Esc Key + :q! ] + Enter
    To enter vi command mode:    [Esc]

Cursor Movement

 
    h       move left (backspace)
    j       move down
    k       move up
    l       move right (space-bar)


    [return]   move to the beginning of the next line

    $       last column on the current line
    0       move cursor to the first column on the current line
    ^       move cursor to first non blank column on the current line
    w      move to the beginning of the next word or  punctuation mark
    W     move past the next space
    b       move to the beginning of the previous word  or punctuation mark
    B      move to the beginning of the previous word, ignores punctuation

        e       end of next word or punctuation mark
        E      end of next word, ignoring punctuation
        H      move cursor to the top of the screen
        M      move cursor to the middle of the screen
        L       move cursor to the bottom of the screen

Screen Movement
 
       G        move to the last line in the file
       gg        move to the first line in the file
       xG       move to line x


       z+       move current line to top of screen
       z        move current line to the middle of screen
       z-       move current line to the bottom of screen

       ^F       move forward one screen
       ^B       move backward one line

       ^D       move forward one half screen
       ^U       move backward one half screen

Inserting
 
       i        insert before cursor
       a        append after cursor

       A        append at end of line
       O        open line above cursor and enter append mode
       r        replace character under cursor with next character typed
       R        keep replacing character until [esc] is hit

Deleting
 
    x       delete character under cursor
    dd      delete line under cursor

    dw      delete word under cursor
    db      delete word before cursor

Copying Code
 
    yy      (yank)'copies' line which may then be put by the p(put) command. Precede with a count
              for multiple lines.

Put Command
 
        Brings back previous deletion or yank of lines,words, or characters
        P       bring back before cursor
        p       bring back after cursor

 Find Commands
 
    ?       finds a word going backwards
    /       finds a word going forwards


    f       finds a character on the line under the cursor going forward
    F       finds a character on the line under the cursor going backwards

    t       find a character on the current line going forward and stop one character before it
    T       find a character on the current line going backward and stop one character before it

    ;        repeat last f, F, t, T

Miscellaneous Commands
 
    ^G    display current line number
    .        repeat last command
    u       undoes last command issued
    U      undoes all commands on one line

Line Editor Mode
    Any commands form the line editor ex can be issued upon entering line mode.

    To enter: type ':'
    To exit: press[return] or [esc]

READING FILES
    copies (reads) filename after cursor in file currently editing

    :r filename

WRITE FILE
    :w     saves the current file without quitting

MOVING
    :#    move to line #
    :$    move to last line of file

SHELL ESCAPE
    executes 'cmd' as a shell command.
    :!'cmd'






                         **********************END************************

Friday, 12 February 2016

Setting up syntax highlighter on Blogger


Before doing any change on your blog always take the backup of your template. To take backup of your blogger template. Login to your blog and go to template and Click on Backup/Restore button which is at right top corner.

Click on Download full template button. You will get a .xml file of your template which you can save in your system.


  • After taking backup of your blogger click on "Edit HTML" button and copy all css given in this link and paste it before </b:skin> tag.

  • Paste the following code before </head> tag

<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shCore.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushCpp.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushCSharp.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushCss.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushDelphi.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushJava.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushJScript.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushPhp.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushPython.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushRuby.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushSql.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushVb.js' type='text/javascript'></script>
<script src='http://syntaxhighlighter.googlecode.com/svn/trunk/Scripts/shBrushXml.js' type='text/javascript'></script>

  • Now paste the following code before </body> tag.
<script language='javascript'>
dp.SyntaxHighlighter.BloggerMode();
dp.SyntaxHighlighter.HighlightAll('code');
</script>

  • Now hit "Save Template" button to save your changes.

  • Syntax highlighter is ready to use. You can place your code between <pre></pre> tag.
<pre name="code">
...Your html-escaped code goes here...
</pre>

<pre class="python" name="code" >
    print "Hello python! "
</pre>

  • You can Escape your code here.
Related Posts Plugin for WordPress, Blogger...