summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Fischer <yvesf-git@xapek.org>2015-07-05 02:42:04 +0200
committerYves Fischer <yvesf-git@xapek.org>2015-07-05 02:42:04 +0200
commit059bed7574351310a022a8f3ae911bafe524380b (patch)
treecafe817a0901b81d43e9ab9d13b14c9a9182e5d9
parent5904c4f9a596dcf96d328914ad9b93be1a138c74 (diff)
downloadpyinflux-059bed7574351310a022a8f3ae911bafe524380b.tar.gz
pyinflux-059bed7574351310a022a8f3ae911bafe524380b.zip
with funcparserlib
-rw-r--r--pyinfluxtools/__init__.py162
-rw-r--r--setup.py2
2 files changed, 133 insertions, 31 deletions
diff --git a/pyinfluxtools/__init__.py b/pyinfluxtools/__init__.py
index 6e10914..7654051 100644
--- a/pyinfluxtools/__init__.py
+++ b/pyinfluxtools/__init__.py
@@ -1,5 +1,12 @@
#!/usr/bin/env python3
import re
+import sys
+
+from pprint import pprint
+from funcparserlib.lexer import make_tokenizer, Token, LexerError
+from funcparserlib.parser import (some, a, maybe, many, finished, skip)
+
+
class WriteRequest(object):
@@ -8,12 +15,12 @@ class WriteRequest(object):
"""
Parse multiple Write objects separeted by new-line character.
- >>> lines = []
- >>> lines += ['cpu']
- >>> lines += ['cpu,host=serverA,region=us-west']
- >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2']
- >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234']
- >>> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines)))))
+ >> lines = []
+ >> lines += ['cpu']
+ >> lines += ['cpu,host=serverA,region=us-west']
+ >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2']
+ >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234']
+ >> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines)))))
cpu
cpu,host="serverA",region="us-west"
cpu,host="serverA",region="us-west" field1=1,field2=2
@@ -42,50 +49,145 @@ class Write(object):
self.fields = self.fields.items()
@staticmethod
+ def tokenize(str):
+ specs = [
+ ('Comma', (r',',)),
+ ('Space', (r' ',)),
+ ('Equal', (r'=',)),
+ ('Quote', (r'"',)),
+ ('Escape', (r'\\',)),
+ ('Int', (r'[0-9]+',)),
+ ('Float', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
+ ('Text', (r'[A-Za-z\200-\377_0-9-\.]+',)),
+ ]
+ useless = [] #'Comma', 'NL', 'Space', 'Header', 'Footer']
+ t = make_tokenizer(specs)
+ return [x for x in t(str) if x.type not in useless]
+
+
+ @staticmethod
def parse(line):
"""
Parse a line from the POST request into a Write object.
- >>> Write.parse('cpu')
- <Write key=cpu tags=None fields=None timestamp=None>
- >>> print(Write.parse('cpu'))
- cpu
+ >>> Write.parse('cpu a=1')
+ <Write key=cpu tags=[] fields=[('a', 1)] timestamp=None>
- >>> Write.parse('cpu,host=serverA,region=us-west')
- <Write key=cpu tags=[('host', 'serverA'), ('region', 'us-west')] fields=None timestamp=None>
- >>> print(Write.parse('cpu,host=serverA,region=us-west'))
- cpu,host="serverA",region="us-west"
+ >>> print(Write.parse('cpu a=1'))
+ cpu a=1
+
+ >>> Write.parse('cpu,host=serverA,region=us-west foo=bar')
+ <Write key=cpu tags=[('host', 'serverA'), ('region', 'us-west')] fields=[('foo', 'bar')] timestamp=None>
+
+ >>> print(Write.parse('cpu host=serverA,region=us-west'))
+ cpu host="serverA",region="us-west"
+
+ >>> Write.parse('cpu\\,01 host=serverA,region=us-west')
+ <Write key=cpu,01 tags=[] fields=[('host', 'serverA'), ('region', 'us-west')] timestamp=None>
+
+ >>> print(Write.parse('cpu\,01 host=serverA,region=us-west'))
+ cpu\,01 host="serverA",region="us-west"
- >>> Write.parse('cpu\\,01,host=serverA,region=us-west')
- <Write key=cpu,01 tags=[('host', 'serverA'), ('region', 'us-west')] fields=None timestamp=None>
- >>> print(Write.parse('cpu\,01,host=serverA,region=us-west'))
- cpu\,01,host="serverA",region="us-west"
+ >>> Write.parse('cpu host=server\\ A,region=us\\ west')
+ <Write key=cpu tags=[] fields=[('host', 'server A'), ('region', 'us west')] timestamp=None>
- >>> Write.parse('cpu,host=server\\ A,region=us\\ west')
- <Write key=cpu tags=[('host', 'server A'), ('region', 'us west')] fields=None timestamp=None>
- >>> print(Write.parse('cpu,host=server\\ A,region=us\\ west'))
- cpu,host="server A",region="us west"
+ >>> Write.parse('cpu ho\=st=server\ A,region=us\ west')
+ <Write key=cpu tags=[] fields=[('ho=st', 'server A'), ('region', 'us west')] timestamp=None>
- >>> Write.parse('cpu,ho\=st=server\ A,region=us\ west')
- <Write key=cpu tags=[('ho=st', 'server A'), ('region', 'us west')] fields=None timestamp=None>
- >>> print(Write.parse('cpu,ho\=st=server\ A,region=us\ west'))
- cpu,ho\=st="server A",region="us west"
+ >>> print(Write.parse('cpu ho\=st=server\ A,region=us\ west'))
+ cpu ho\=st="server A",region="us west"
>>> print(Write.parse('cpu,ho\=st=server\ A field=123'))
cpu,ho\=st="server A" field=123
+
>>> print(Write.parse('cpu,foo=bar,foo=bar field=123,field=123')) # error: double name is accepted
cpu,foo="bar",foo="bar" field=123,field=123
+
>>> print(Write.parse('cpu field12=12'))
cpu field12=12
+
>>> print(Write.parse('cpu field12=12 123123123'))
cpu field12=12 123123123
- >>> print(Write.parse('cpu field12=12 1231abcdef123'))
+
+ >> print(Write.parse('cpu field12=12 1231abcdef123'))
Traceback (most recent call last):
...
- ValueError: invalid literal for int() with base 10: '1231abcdef123'
- >>> print(Write.parse('cpu field="hello World"'))
- null
+ funcparserlib.parser.NoParseError: should have reached <EOF>: 1,20-1,28: Text 'abcdef123'
+
+ >>> print(Write.parse("cpu,x=3,y=4,z=6 field\ name=\\"HH \\\\\\"World\\",x=asdf\\\\ foo"))
+ cpu,x=3,y=4,z=6 field\\ name="HH \\"World",x="asdf foo"
+
+ >>> print(Write.parse("cpu,x=3 field\ name=\\"HH \\\\\\"World\\",x=asdf\\\\ foo"))
+ cpu,x=3 field\\ name="HH \\"World",x="asdf foo"
+
+ >>> print(Write.parse("cpu foo=bar 12345"))
+ cpu foo="bar" 12345
+
+ >>> print(Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with field_key\\\\\\="string field value, only \\\\" need be quoted"'))
+ "measurement\ with\ quotes",tag\ key\ with\ spaces="tag,value,with" field_key\\\\="string field value, only \\" need be quoted"
+
+ >>> Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with"commas" field_key\\\\\\\\="string field value, only \\\\" need be quoted"')
+ <Write key="measurement with quotes" tags=[('tag key with spaces', 'tag,value,with"commas"')] fields=[('field_key\\\\', 'string field value, only " need be quoted')] timestamp=None>
+
+ #>>> Write.parse('disk_free value=442221834240,working\ directories="C:\My Documents\Stuff for examples,C:\My Documents"')
+ #Fails.... this format is just crazy
"""
+
+ tokval = lambda t: t.value
+ toksval = lambda x: "".join(x)
+ token = lambda type: some(lambda t: t.type == type)
+
+ space = token('Space') >> tokval
+ comma = token('Comma') >> tokval
+ quote = token('Quote') >> tokval
+ escape_space = token('Escape') + token('Space') >> (lambda x: " ")
+ escape_comma = token('Escape') + token('Comma') >> (lambda x: ",")
+ escape_equal = token('Escape') + token('Equal') >> (lambda x: "=")
+ escape_quote = token('Escape') + token('Quote') >> (lambda x: "\"")
+ escape_escape = token('Escape') + token('Escape') >> (lambda x: "\\")
+ plain_int = token('Int') >> (lambda t: int(tokval(t)))
+ plain_int_text = token('Int') >> tokval
+ plain_float = token('Float') >> (lambda t: float(tokval(t)))
+ plain_float_text = token('Float') >> tokval
+ plain_bool = some( lambda t: t.type == 'Text' and t.value.lower() in ["t", "true"]) >> (lambda t: True) | \
+ some( lambda t: t.type == 'Text' and t.value.lower() in ["f", "false"]) >> (lambda t: False)
+ plain_text = token("Text") >> tokval
+
+ identifier = many( plain_text | escape_space | escape_comma | escape_escape | plain_int_text | token('Quote') >> tokval ) >> toksval
+ quoted_text = many( escape_escape | escape_quote | plain_text | space | comma | plain_int_text | plain_float_text) >> (lambda x: "".join(x))
+ unquoted_text = many( escape_space | escape_comma | escape_equal | escape_escape | quote | plain_text | plain_int_text ) >> toksval
+ string_value = ( skip(token('Quote')) + quoted_text + skip(token('Quote')) ) | unquoted_text
+
+ kv_value = plain_int | plain_float | plain_bool | string_value
+ kv = string_value + skip(token('Equal')) + kv_value >> (lambda x: (x[0],x[1]))
+
+ def setter(obj, propert):
+ def r(val):
+ setattr(obj, propert, val)
+ return (propert, val)
+ return r
+
+ key = identifier
+ tags = many( skip(token('Comma')) + kv) >> (lambda x: x) # (lambda x: [x[0]] + x[1])
+ fields = ( kv + many( skip(token('Comma')) + kv ) ) >> (lambda x: [x[0]] + x[1])
+ timestamp = plain_int
+
+ write = Write(None, None, None, None)
+ toplevel = (key >> setter(write, "key")) + \
+ maybe( tags >> setter(write, "tags") ) + \
+ ( skip(token('Space')) + (fields >> setter(write, "fields")) ) + \
+ maybe( skip(token('Space')) + timestamp >> setter(write, "timestamp") ) + \
+ skip(finished) >> (lambda x: x)
+ try:
+ result = toplevel.parse(Write.tokenize(line))
+ except:
+ pprint(line, stream=sys.stderr)
+ pprint(write, stream=sys.stderr)
+ pprint(Write.tokenize(line), stream=sys.stderr)
+ raise
+ #pprint({line : result}, stream=sys.stderr)
+ return write
+
def unescape(string):
return re.sub(r'(?<!\\)([\\,=])', '', string)
diff --git a/setup.py b/setup.py
index a82b3c6..7126f2e 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ setup(name='pyinfluxtools',
packages = ['pyinfluxtools'],
# url='https://github.com/',
# scripts=[],
-# install_requires=[],
+ install_requires=['funcparserlib==0.3.6'],
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",