FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > ArchLinux > ArchLinux Pacman Development

 
 
LinkBack Thread Tools
 
Old 01-01-2012, 02:07 AM
Dan McGee
 
Default Only compile delta regex once

This reduces the number of regcomp() calls when parsing delta entries in
the database from once per entry to once for the entire context handle
by storing the compiled regex data on the handle itself. Just as we do
with the cURL handle, we initialize it the first time it is needed and
free it when releasing the handle.

A few other small tweaks to the parsing function also take place,
including using the stack to store the transient and short file size
string while parsing it.

When parsing a sync database with 1378 delta entries, this reduces the
time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s.

Signed-off-by: Dan McGee <dan@archlinux.org>
---

Test database, built from my local cache that was used for above numbers:
https://dev.archlinux.org/~dan/deltas.db

Should be useful for delta-related code in general.

lib/libalpm/delta.c | 33 +++++++++++++++++++--------------
lib/libalpm/delta.h | 2 +-
lib/libalpm/handle.c | 3 +++
lib/libalpm/handle.h | 5 +++++
4 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c
index 165cdef..726f03c 100644
--- a/lib/libalpm/delta.c
+++ b/lib/libalpm/delta.c
@@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg)
* This function assumes that the string is in the correct format.
* This format is as follows:
* $deltafile $deltamd5 $deltasize $oldfile $newfile
+ * @param handle the context handle
* @param line the string to parse
* @return A pointer to the new alpm_delta_t object
*/
-/* TODO this does not really belong here, but in a parsing lib */
-alpm_delta_t *_alpm_delta_parse(char *line)
+alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line)
{
alpm_delta_t *delta;
- char *tmp;
const int num_matches = 6;
size_t len;
- regex_t reg;
regmatch_t pmatch[num_matches];
+ char filesize[32];
+
+ /* this is so we only have to compile the pattern once */
+ if(!handle->delta_regex_compiled) {
+ /* $deltafile $deltamd5 $deltasize $oldfile $newfile*/
+ regcomp(&handle->delta_regex,
+ "^([^[:space:]]+) ([[:xdigit:]]{32}) ([[:digit:]]+)"
+ " ([^[:space:]]+) ([^[:space:]]+)$",
+ REG_EXTENDED | REG_NEWLINE);
+ handle->delta_regex_compiled = 1;
+ }

- regcomp(&reg,
- "^([^[:space:]]*) ([[:xdigit:]]{32}) ([[:digit:]]*)"
- " ([^[:space:]]*) ([^[:space:]]*)$",
- REG_EXTENDED | REG_NEWLINE);
- if(regexec(&reg, line, num_matches, pmatch, 0) != 0) {
+ if(regexec(&handle->delta_regex, line, num_matches, pmatch, 0) != 0) {
/* delta line is invalid, return NULL */
- regfree(&reg);
return NULL;
}
- regfree(&reg);

CALLOC(delta, 1, sizeof(alpm_delta_t), return NULL);

@@ -307,9 +310,11 @@ alpm_delta_t *_alpm_delta_parse(char *line)
STRNDUP(delta->delta_md5, &line[pmatch[2].rm_so], len, return NULL);

len = pmatch[3].rm_eo - pmatch[3].rm_so;
- STRNDUP(tmp, &line[pmatch[3].rm_so], len, return NULL);
- delta->delta_size = _alpm_strtoofft(tmp);
- free(tmp);
+ if(len < sizeof(filesize)) {
+ strncpy(filesize, &line[pmatch[3].rm_so], len);
+ filesize[len] = '';
+ delta->delta_size = _alpm_strtoofft(filesize);
+ }

len = pmatch[4].rm_eo - pmatch[4].rm_so;
STRNDUP(delta->from, &line[pmatch[4].rm_so], len, return NULL);
diff --git a/lib/libalpm/delta.h b/lib/libalpm/delta.h
index 42353f5..1173ddf 100644
--- a/lib/libalpm/delta.h
+++ b/lib/libalpm/delta.h
@@ -24,7 +24,7 @@

#include "alpm.h"

-alpm_delta_t *_alpm_delta_parse(char *line);
+alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line);
void _alpm_delta_free(alpm_delta_t *delta);
alpm_delta_t *_alpm_delta_dup(const alpm_delta_t *delta);
off_t _alpm_shortest_delta_path(alpm_handle_t *handle, alpm_list_t *deltas,
diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c
index 6518b7d..8e2e3c7 100644
--- a/lib/libalpm/handle.c
+++ b/lib/libalpm/handle.c
@@ -34,6 +34,7 @@
#include "alpm_list.h"
#include "util.h"
#include "log.h"
+#include "delta.h"
#include "trans.h"
#include "alpm.h"

@@ -67,6 +68,8 @@ void _alpm_handle_free(alpm_handle_t *handle)
curl_easy_cleanup(handle->curl);
#endif

+ regfree(&handle->delta_regex);
+
/* free memory */
_alpm_trans_free(handle->trans);
FREE(handle->root);
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h
index 1f147d6..a64c5c9 100644
--- a/lib/libalpm/handle.h
+++ b/lib/libalpm/handle.h
@@ -22,6 +22,7 @@

#include <stdio.h>
#include <sys/types.h>
+#include <regex.h>

#include "alpm_list.h"
#include "alpm.h"
@@ -94,6 +95,10 @@ struct __alpm_handle_t {

/* error code */
alpm_errno_t pm_errno;
+
+ /* for delta parsing efficiency */
+ int delta_regex_compiled;
+ regex_t delta_regex;
};

alpm_handle_t *_alpm_handle_new(void);
--
1.7.8.1
 
Old 01-02-2012, 03:49 AM
Allan McRae
 
Default Only compile delta regex once

On 01/01/12 13:07, Dan McGee wrote:
> This reduces the number of regcomp() calls when parsing delta entries in
> the database from once per entry to once for the entire context handle
> by storing the compiled regex data on the handle itself. Just as we do
> with the cURL handle, we initialize it the first time it is needed and
> free it when releasing the handle.
>
> A few other small tweaks to the parsing function also take place,
> including using the stack to store the transient and short file size
> string while parsing it.
>
> When parsing a sync database with 1378 delta entries, this reduces the
> time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s.
>
> Signed-off-by: Dan McGee <dan@archlinux.org>
> ---
>
> Test database, built from my local cache that was used for above numbers:
> https://dev.archlinux.org/~dan/deltas.db
>
> Should be useful for delta-related code in general.
>
> lib/libalpm/delta.c | 33 +++++++++++++++++++--------------
> lib/libalpm/delta.h | 2 +-
> lib/libalpm/handle.c | 3 +++
> lib/libalpm/handle.h | 5 +++++
> 4 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c
> index 165cdef..726f03c 100644
> --- a/lib/libalpm/delta.c
> +++ b/lib/libalpm/delta.c
> @@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg)
> * This function assumes that the string is in the correct format.
> * This format is as follows:
> * $deltafile $deltamd5 $deltasize $oldfile $newfile
> + * @param handle the context handle
> * @param line the string to parse
> * @return A pointer to the new alpm_delta_t object
> */
> -/* TODO this does not really belong here, but in a parsing lib */
> -alpm_delta_t *_alpm_delta_parse(char *line)
> +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line)


Changed function prototype so should also change the one usage of this
function in the codebase...

Fixup patch on my patchqueue branch.

Allan
 
Old 01-02-2012, 05:59 PM
Dan McGee
 
Default Only compile delta regex once

On Sun, Jan 1, 2012 at 10:49 PM, Allan McRae <allan@archlinux.org> wrote:
> On 01/01/12 13:07, Dan McGee wrote:
>> This reduces the number of regcomp() calls when parsing delta entries in
>> the database from once per entry to once for the entire context handle
>> by storing the compiled regex data on the handle itself. Just as we do
>> with the cURL handle, we initialize it the first time it is needed and
>> free it when releasing the handle.
>>
>> A few other small tweaks to the parsing function also take place,
>> including using the stack to store the transient and short file size
>> string while parsing it.
>>
>> When parsing a sync database with 1378 delta entries, this reduces the
>> time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s.
>>
>> Signed-off-by: Dan McGee <dan@archlinux.org>
>> ---
>>
>> Test database, built from my local cache that was used for above numbers:
>> * * https://dev.archlinux.org/~dan/deltas.db
>>
>> Should be useful for delta-related code in general.
>>
>> *lib/libalpm/delta.c *| * 33 +++++++++++++++++++--------------
>> *lib/libalpm/delta.h *| * *2 +-
>> *lib/libalpm/handle.c | * *3 +++
>> *lib/libalpm/handle.h | * *5 +++++
>> *4 files changed, 28 insertions(+), 15 deletions(-)
>>
>> diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c
>> index 165cdef..726f03c 100644
>> --- a/lib/libalpm/delta.c
>> +++ b/lib/libalpm/delta.c
>> @@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg)
>> * * This function assumes that the string is in the correct format.
>> * * This format is as follows:
>> * * $deltafile $deltamd5 $deltasize $oldfile $newfile
>> + * @param handle the context handle
>> * * @param line the string to parse
>> * * @return A pointer to the new alpm_delta_t object
>> * */
>> -/* TODO this does not really belong here, but in a parsing lib */
>> -alpm_delta_t *_alpm_delta_parse(char *line)
>> +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line)
>
>
> Changed function prototype so should also change the one usage of this
> function in the codebase...

Good catch of course, my rebase-foo was not working well and this got
swallowed into a different patch- thanks for actually testing these.


-Dan
 

Thread Tools




All times are GMT. The time now is 05:54 AM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org